Edit on GitHub
sqlglot.generators.duckdb

View Source
   1from __future__ import annotations
   2
   3from decimal import Decimal
   4from itertools import groupby
   5import re
   6import typing as t
   7
   8from sqlglot import exp, generator, transforms
   9
  10from sqlglot.dialects.dialect import (
  11    DATETIME_DELTA,
  12    JSON_EXTRACT_TYPE,
  13    approx_count_distinct_sql,
  14    array_append_sql,
  15    array_compact_sql,
  16    array_concat_sql,
  17    arrow_json_extract_sql,
  18    count_if_to_sum,
  19    date_delta_to_binary_interval_op,
  20    datestrtodate_sql,
  21    encode_decode_sql,
  22    explode_to_unnest_sql,
  23    generate_series_sql,
  24    getbit_sql,
  25    groupconcat_sql,
  26    inline_array_unless_query,
  27    months_between_sql,
  28    no_datetime_sql,
  29    no_comment_column_constraint_sql,
  30    no_make_interval_sql,
  31    no_time_sql,
  32    no_timestamp_sql,
  33    rename_func,
  34    remove_from_array_using_filter,
  35    strposition_sql,
  36    str_to_time_sql,
  37    timestrtotime_sql,
  38    unit_to_str,
  39)
  40from sqlglot.generator import unsupported_args
  41from sqlglot.helper import is_date_unit, seq_get
  42from builtins import type as Type
  43
  44# Regex to detect time zones in timestamps of the form [+|-]TT[:tt]
  45# The pattern matches timezone offsets that appear after the time portion
  46TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?")
  47
  48# Characters that must be escaped when building regex expressions in INITCAP
  49REGEX_ESCAPE_REPLACEMENTS = {
  50    "\\": "\\\\",
  51    "-": r"\-",
  52    "^": r"\^",
  53    "[": r"\[",
  54    "]": r"\]",
  55}
  56
  57# Used to in RANDSTR transpilation
  58RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  59RANDSTR_SEED = 123456
  60
  61# Whitespace control characters that DuckDB must process with `CHR({val})` calls
  62WS_CONTROL_CHARS_TO_DUCK = {
  63    "\u000b": 11,
  64    "\u001c": 28,
  65    "\u001d": 29,
  66    "\u001e": 30,
  67    "\u001f": 31,
  68}
  69
  70# Days of week to ISO 8601 day-of-week numbers
  71# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
  72WEEK_START_DAY_TO_DOW = {
  73    "MONDAY": 1,
  74    "TUESDAY": 2,
  75    "WEDNESDAY": 3,
  76    "THURSDAY": 4,
  77    "FRIDAY": 5,
  78    "SATURDAY": 6,
  79    "SUNDAY": 7,
  80}
  81
  82MAX_BIT_POSITION = exp.Literal.number(32768)
  83
  84# cs/as/ps are Snowflake defaults; DuckDB already behaves the same way, so they are safe to drop.
  85# Note: "as" is also a reserved keyword in DuckDB, making it impossible to pass through.
  86_SNOWFLAKE_COLLATION_DEFAULTS = frozenset({"cs", "as", "ps"})
  87_SNOWFLAKE_COLLATION_UNSUPPORTED = frozenset(
  88    {"ci", "ai", "upper", "lower", "utf8", "bin", "pi", "fl", "fu", "trim", "ltrim", "rtrim"}
  89)
  90
  91# Window functions that support IGNORE/RESPECT NULLS in DuckDB
  92_IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = (
  93    exp.FirstValue,
  94    exp.Lag,
  95    exp.LastValue,
  96    exp.Lead,
  97    exp.NthValue,
  98)
  99
 100# SEQ function constants
 101_SEQ_BASE: exp.Expr = exp.maybe_parse("(ROW_NUMBER() OVER (ORDER BY 1) - 1)")
 102_SEQ_RESTRICTED = (exp.Where, exp.Having, exp.AggFunc, exp.Order, exp.Select)
 103# Maps SEQ expression types to their byte width (suffix indicates bytes: SEQ1=1, SEQ2=2, etc.)
 104_SEQ_BYTE_WIDTH = {exp.Seq1: 1, exp.Seq2: 2, exp.Seq4: 4, exp.Seq8: 8}
 105
 106# Template for generating signed and unsigned SEQ values within a specified range
 107_SEQ_UNSIGNED: exp.Expr = exp.maybe_parse(":base % :max_val")
 108_SEQ_SIGNED: exp.Expr = exp.maybe_parse(
 109    "(CASE WHEN :base % :max_val >= :half "
 110    "THEN :base % :max_val - :max_val "
 111    "ELSE :base % :max_val END)"
 112)
 113
 114
 115def _apply_base64_alphabet_replacements(
 116    result: exp.Expr,
 117    alphabet: exp.Expr | None,
 118    reverse: bool = False,
 119) -> exp.Expr:
 120    """
 121    Apply base64 alphabet character replacements.
 122
 123    Base64 alphabet can be 1-3 chars: 1st = index 62 ('+'), 2nd = index 63 ('/'), 3rd = padding ('=').
 124    zip truncates to the shorter string, so 1-char alphabet only replaces '+', 2-char replaces '+/', etc.
 125
 126    Args:
 127        result: The expression to apply replacements to
 128        alphabet: Custom alphabet literal (expected chars for +/=)
 129        reverse: If False, replace default with custom (encode)
 130                 If True, replace custom with default (decode)
 131    """
 132    if isinstance(alphabet, exp.Literal) and alphabet.is_string:
 133        for default_char, new_char in zip("+/=", alphabet.this):
 134            if new_char != default_char:
 135                find, replace = (new_char, default_char) if reverse else (default_char, new_char)
 136                result = exp.Replace(
 137                    this=result,
 138                    expression=exp.Literal.string(find),
 139                    replacement=exp.Literal.string(replace),
 140                )
 141    return result
 142
 143
 144def _base64_decode_sql(self: DuckDBGenerator, expression: exp.Expr, to_string: bool) -> str:
 145    """
 146    Transpile Snowflake BASE64_DECODE_STRING/BINARY to DuckDB.
 147
 148    DuckDB uses FROM_BASE64() which returns BLOB. For string output, wrap with DECODE().
 149    Custom alphabets require REPLACE() calls to convert to standard base64.
 150    """
 151    input_expr = expression.this
 152    alphabet = expression.args.get("alphabet")
 153
 154    # Handle custom alphabet by replacing non-standard chars with standard ones
 155    input_expr = _apply_base64_alphabet_replacements(input_expr, alphabet, reverse=True)
 156
 157    # FROM_BASE64 returns BLOB
 158    input_expr = exp.FromBase64(this=input_expr)
 159
 160    if to_string:
 161        input_expr = exp.Decode(this=input_expr)
 162
 163    return self.sql(input_expr)
 164
 165
 166def _last_day_sql(self: DuckDBGenerator, expression: exp.LastDay) -> str:
 167    """
 168    DuckDB's LAST_DAY only supports finding the last day of a month.
 169    For other date parts (year, quarter, week), we need to implement equivalent logic.
 170    """
 171    date_expr = expression.this
 172    unit = expression.text("unit")
 173
 174    if not unit or unit.upper() == "MONTH":
 175        # Default behavior - use DuckDB's native LAST_DAY
 176        return self.func("LAST_DAY", date_expr)
 177
 178    if unit.upper() == "YEAR":
 179        # Last day of year: December 31st of the same year
 180        year_expr = exp.func("EXTRACT", "YEAR", date_expr)
 181        make_date_expr = exp.func(
 182            "MAKE_DATE", year_expr, exp.Literal.number(12), exp.Literal.number(31)
 183        )
 184        return self.sql(make_date_expr)
 185
 186    if unit.upper() == "QUARTER":
 187        # Last day of quarter
 188        year_expr = exp.func("EXTRACT", "YEAR", date_expr)
 189        quarter_expr = exp.func("EXTRACT", "QUARTER", date_expr)
 190
 191        # Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4
 192        last_month_expr = exp.Mul(this=quarter_expr, expression=exp.Literal.number(3))
 193        first_day_last_month_expr = exp.func(
 194            "MAKE_DATE", year_expr, last_month_expr, exp.Literal.number(1)
 195        )
 196
 197        # Last day of the last month of the quarter
 198        last_day_expr = exp.func("LAST_DAY", first_day_last_month_expr)
 199        return self.sql(last_day_expr)
 200
 201    if unit.upper() == "WEEK":
 202        # DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6
 203        dow = exp.func("EXTRACT", "DAYOFWEEK", date_expr)
 204        # Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake)
 205        # Wrap in parentheses to ensure correct precedence
 206        days_to_sunday_expr = exp.Mod(
 207            this=exp.Paren(this=exp.Sub(this=exp.Literal.number(7), expression=dow)),
 208            expression=exp.Literal.number(7),
 209        )
 210        interval_expr = exp.Interval(this=days_to_sunday_expr, unit=exp.var("DAY"))
 211        add_expr = exp.Add(this=date_expr, expression=interval_expr)
 212        cast_expr = exp.cast(add_expr, exp.DType.DATE)
 213        return self.sql(cast_expr)
 214
 215    self.unsupported(f"Unsupported date part '{unit}' in LAST_DAY function")
 216    return self.function_fallback_sql(expression)
 217
 218
 219def _is_nanosecond_unit(unit: exp.Expr | None) -> bool:
 220    return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND"
 221
 222
 223def _handle_nanosecond_diff(
 224    self: DuckDBGenerator,
 225    end_time: exp.Expr,
 226    start_time: exp.Expr,
 227) -> str:
 228    """Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
 229    end_ns = exp.cast(end_time, exp.DType.TIMESTAMP_NS)
 230    start_ns = exp.cast(start_time, exp.DType.TIMESTAMP_NS)
 231
 232    # Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
 233    return self.sql(
 234        exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns))
 235    )
 236
 237
 238def _to_boolean_sql(self: DuckDBGenerator, expression: exp.ToBoolean) -> str:
 239    """
 240    Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
 241
 242    DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
 243    We need to handle the 'on'/'off' cases explicitly.
 244
 245    For TO_BOOLEAN (safe=False): NaN and INF values cause errors. We use DuckDB's native ERROR()
 246    function to replicate this behavior with a clear error message.
 247
 248    For TRY_TO_BOOLEAN (safe=True): Use DuckDB's TRY_CAST for conversion, which returns NULL
 249    for invalid inputs instead of throwing errors.
 250    """
 251    arg = expression.this
 252    is_safe = expression.args.get("safe", False)
 253
 254    base_case_expr = (
 255        exp.case()
 256        .when(
 257            # Handle 'on' -> TRUE (case insensitive)
 258            exp.Upper(this=exp.cast(arg, exp.DType.VARCHAR)).eq(exp.Literal.string("ON")),
 259            exp.true(),
 260        )
 261        .when(
 262            # Handle 'off' -> FALSE (case insensitive)
 263            exp.Upper(this=exp.cast(arg, exp.DType.VARCHAR)).eq(exp.Literal.string("OFF")),
 264            exp.false(),
 265        )
 266    )
 267
 268    if is_safe:
 269        # TRY_TO_BOOLEAN: handle 'on'/'off' and use TRY_CAST for everything else
 270        case_expr = base_case_expr.else_(exp.func("TRY_CAST", arg, exp.DType.BOOLEAN.into_expr()))
 271    else:
 272        # TO_BOOLEAN: handle NaN/INF errors, 'on'/'off', and use regular CAST
 273        cast_to_real = exp.func("TRY_CAST", arg, exp.DType.FLOAT.into_expr())
 274
 275        # Check for NaN and INF values
 276        nan_inf_check = exp.Or(
 277            this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
 278        )
 279
 280        case_expr = base_case_expr.when(
 281            nan_inf_check,
 282            exp.func(
 283                "ERROR",
 284                exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
 285            ),
 286        ).else_(exp.cast(arg, exp.DType.BOOLEAN))
 287
 288    return self.sql(case_expr)
 289
 290
 291# BigQuery -> DuckDB conversion for the DATE function
 292def _date_sql(self: DuckDBGenerator, expression: exp.Date) -> str:
 293    this = expression.this
 294    zone = self.sql(expression, "zone")
 295
 296    if zone:
 297        # BigQuery considers "this" at UTC, converts it to the specified
 298        # time zone and then keeps only the DATE part
 299        # To micmic that, we:
 300        #   (1) Cast to TIMESTAMP to remove DuckDB's local tz
 301        #   (2) Apply consecutive AtTimeZone calls for UTC -> zone conversion
 302        this = exp.cast(this, exp.DType.TIMESTAMP)
 303        at_utc = exp.AtTimeZone(this=this, zone=exp.Literal.string("UTC"))
 304        this = exp.AtTimeZone(this=at_utc, zone=zone)
 305
 306    return self.sql(exp.cast(expression=this, to=exp.DType.DATE))
 307
 308
 309# BigQuery -> DuckDB conversion for the TIME_DIFF function
 310def _timediff_sql(self: DuckDBGenerator, expression: exp.TimeDiff) -> str:
 311    unit = expression.unit
 312
 313    if _is_nanosecond_unit(unit):
 314        return _handle_nanosecond_diff(self, expression.expression, expression.this)
 315
 316    this = exp.cast(expression.this, exp.DType.TIME)
 317    expr = exp.cast(expression.expression, exp.DType.TIME)
 318
 319    # Although the 2 dialects share similar signatures, BQ seems to inverse
 320    # the sign of the result so the start/end time operands are flipped
 321    return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
 322
 323
 324def _date_delta_to_binary_interval_op(
 325    cast: bool = True,
 326) -> t.Callable[[DuckDBGenerator, DATETIME_DELTA], str]:
 327    """
 328    DuckDB override to handle:
 329    1. NANOSECOND operations (DuckDB doesn't support INTERVAL ... NANOSECOND)
 330    2. Float/decimal interval values (DuckDB INTERVAL requires integers)
 331    """
 332    base_impl = date_delta_to_binary_interval_op(cast=cast)
 333
 334    def _duckdb_date_delta_sql(self: DuckDBGenerator, expression: DATETIME_DELTA) -> str:
 335        unit = expression.unit
 336        interval_value = expression.expression
 337
 338        # Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
 339        if _is_nanosecond_unit(unit):
 340            if isinstance(interval_value, exp.Interval):
 341                interval_value = interval_value.this
 342
 343            timestamp_ns = exp.cast(expression.this, exp.DType.TIMESTAMP_NS)
 344
 345            return self.sql(
 346                exp.func(
 347                    "MAKE_TIMESTAMP_NS",
 348                    exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=interval_value),
 349                )
 350            )
 351
 352        # Handle float/decimal interval values as duckDB INTERVAL requires integer expressions
 353        if not interval_value or isinstance(interval_value, exp.Interval):
 354            return base_impl(self, expression)
 355
 356        if interval_value.is_type(*exp.DataType.REAL_TYPES):
 357            expression.set("expression", exp.cast(exp.func("ROUND", interval_value), "INT"))
 358
 359        return base_impl(self, expression)
 360
 361    return _duckdb_date_delta_sql
 362
 363
 364def _array_insert_sql(self: DuckDBGenerator, expression: exp.ArrayInsert) -> str:
 365    """
 366    Transpile ARRAY_INSERT to DuckDB using LIST_CONCAT and slicing.
 367
 368    Handles:
 369    - 0-based and 1-based indexing (normalizes to 0-based for calculations)
 370    - Negative position conversion (requires array length)
 371    - NULL propagation (source dialects return NULL, DuckDB creates single-element array)
 372    - Assumes position is within bounds per user constraint
 373
 374    Note: All dialects that support ARRAY_INSERT (Snowflake, Spark, Databricks) have
 375    ARRAY_FUNCS_PROPAGATES_NULLS=True, so we always assume source propagates NULLs.
 376
 377    Args:
 378        expression: The ArrayInsert expression to transpile.
 379
 380    Returns:
 381        SQL string implementing ARRAY_INSERT behavior.
 382    """
 383    this = expression.this
 384    position = expression.args.get("position")
 385    element = expression.expression
 386    element_array = exp.Array(expressions=[element])
 387    index_offset = expression.args.get("offset", 0)
 388
 389    if not position or not position.is_int:
 390        self.unsupported("ARRAY_INSERT can only be transpiled with a literal position")
 391        return self.func("ARRAY_INSERT", this, position, element)
 392
 393    pos_value = position.to_py()
 394
 395    # Normalize one-based indexing to zero-based for slice calculations
 396    # Spark (1-based) -> Snowflake (0-based):
 397    #   Positive: pos=1 -> pos=0 (subtract 1)
 398    #   Negative: pos=-2 -> pos=-1 (add 1)
 399    # Example: Spark array_insert([a,b,c], -2, d) -> [a,b,d,c] is same as Snowflake pos=-1
 400    if pos_value > 0:
 401        pos_value = pos_value - index_offset
 402    elif pos_value < 0:
 403        pos_value = pos_value + index_offset
 404
 405    # Build the appropriate list_concat expression based on position
 406    if pos_value == 0:
 407        # insert at beginning
 408        concat_exprs = [element_array, this]
 409    elif pos_value > 0:
 410        # Positive position: LIST_CONCAT(arr[1:pos], [elem], arr[pos+1:])
 411        # 0-based -> DuckDB 1-based slicing
 412
 413        # left slice: arr[1:pos]
 414        slice_start = exp.Bracket(
 415            this=this,
 416            expressions=[
 417                exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
 418            ],
 419        )
 420
 421        # right slice: arr[pos+1:]
 422        slice_end = exp.Bracket(
 423            this=this, expressions=[exp.Slice(this=exp.Literal.number(pos_value + 1))]
 424        )
 425
 426        concat_exprs = [slice_start, element_array, slice_end]
 427    else:
 428        # Negative position: arr[1:LEN(arr)+pos], [elem], arr[LEN(arr)+pos+1:]
 429        # pos=-1 means insert before last element
 430        arr_len = exp.Length(this=this)
 431
 432        # Calculate slice position: LEN(arr) + pos (e.g., LEN(arr) + (-1) = LEN(arr) - 1)
 433        slice_end_pos = arr_len + exp.Literal.number(pos_value)
 434        slice_start_pos = slice_end_pos + exp.Literal.number(1)
 435
 436        # left slice: arr[1:LEN(arr)+pos]
 437        slice_start = exp.Bracket(
 438            this=this,
 439            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
 440        )
 441
 442        # right slice: arr[LEN(arr)+pos+1:]
 443        slice_end = exp.Bracket(this=this, expressions=[exp.Slice(this=slice_start_pos)])
 444
 445        concat_exprs = [slice_start, element_array, slice_end]
 446
 447    # All dialects that support ARRAY_INSERT propagate NULLs (Snowflake/Spark/Databricks)
 448    # Wrap in CASE WHEN array IS NULL THEN NULL ELSE func_expr END
 449    return self.sql(
 450        exp.If(
 451            this=exp.Is(this=this, expression=exp.Null()),
 452            true=exp.Null(),
 453            false=self.func("LIST_CONCAT", *concat_exprs),
 454        )
 455    )
 456
 457
 458def _array_remove_at_sql(self: DuckDBGenerator, expression: exp.ArrayRemoveAt) -> str:
 459    """
 460    Transpile ARRAY_REMOVE_AT to DuckDB using LIST_CONCAT and slicing.
 461
 462    Handles:
 463    - Positive positions (0-based indexing)
 464    - Negative positions (from end of array)
 465    - NULL propagation (Snowflake returns NULL for NULL array, DuckDB doesn't auto-propagate)
 466    - Only supports literal integer positions (non-literals remain untranspiled)
 467
 468    Transpilation patterns:
 469    - pos=0 (first): arr[2:]
 470    - pos>0 (middle): LIST_CONCAT(arr[1:p], arr[p+2:])
 471    - pos=-1 (last): arr[1:LEN(arr)-1]
 472    - pos<-1: LIST_CONCAT(arr[1:LEN(arr)+p], arr[LEN(arr)+p+2:])
 473
 474    All wrapped in: CASE WHEN arr IS NULL THEN NULL ELSE ... END
 475
 476    Args:
 477        expression: The ArrayRemoveAt expression to transpile.
 478
 479    Returns:
 480        SQL string implementing ARRAY_REMOVE_AT behavior.
 481    """
 482    this = expression.this
 483    position = expression.args.get("position")
 484
 485    if not position or not position.is_int:
 486        self.unsupported("ARRAY_REMOVE_AT can only be transpiled with a literal position")
 487        return self.func("ARRAY_REMOVE_AT", this, position)
 488
 489    pos_value = position.to_py()
 490
 491    # Build the appropriate expression based on position
 492    if pos_value == 0:
 493        # Remove first element: arr[2:]
 494        result_expr: exp.Expr | str = exp.Bracket(
 495            this=this,
 496            expressions=[exp.Slice(this=exp.Literal.number(2))],
 497        )
 498    elif pos_value > 0:
 499        # Remove at positive position: LIST_CONCAT(arr[1:pos], arr[pos+2:])
 500        # DuckDB uses 1-based slicing
 501        left_slice = exp.Bracket(
 502            this=this,
 503            expressions=[
 504                exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
 505            ],
 506        )
 507        right_slice = exp.Bracket(
 508            this=this,
 509            expressions=[exp.Slice(this=exp.Literal.number(pos_value + 2))],
 510        )
 511        result_expr = self.func("LIST_CONCAT", left_slice, right_slice)
 512    elif pos_value == -1:
 513        # Remove last element: arr[1:LEN(arr)-1]
 514        # Optimization: simpler than general negative case
 515        arr_len = exp.Length(this=this)
 516        slice_end = arr_len + exp.Literal.number(-1)
 517        result_expr = exp.Bracket(
 518            this=this,
 519            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end)],
 520        )
 521    else:
 522        # Remove at negative position: LIST_CONCAT(arr[1:LEN(arr)+pos], arr[LEN(arr)+pos+2:])
 523        arr_len = exp.Length(this=this)
 524        slice_end_pos = arr_len + exp.Literal.number(pos_value)
 525        slice_start_pos = slice_end_pos + exp.Literal.number(2)
 526
 527        left_slice = exp.Bracket(
 528            this=this,
 529            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
 530        )
 531        right_slice = exp.Bracket(
 532            this=this,
 533            expressions=[exp.Slice(this=slice_start_pos)],
 534        )
 535        result_expr = self.func("LIST_CONCAT", left_slice, right_slice)
 536
 537    # Snowflake ARRAY_FUNCS_PROPAGATES_NULLS=True, so wrap in NULL check
 538    # CASE WHEN array IS NULL THEN NULL ELSE result_expr END
 539    return self.sql(
 540        exp.If(
 541            this=exp.Is(this=this, expression=exp.Null()),
 542            true=exp.Null(),
 543            false=result_expr,
 544        )
 545    )
 546
 547
 548@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
 549def _array_sort_sql(self: DuckDBGenerator, expression: exp.ArraySort) -> str:
 550    return self.func("ARRAY_SORT", expression.this)
 551
 552
 553def _array_contains_sql(self: DuckDBGenerator, expression: exp.ArrayContains) -> str:
 554    this = expression.this
 555    expr = expression.expression
 556
 557    func = self.func("ARRAY_CONTAINS", this, expr)
 558
 559    if expression.args.get("check_null"):
 560        check_null_in_array = exp.Nullif(
 561            this=exp.NEQ(this=exp.ArraySize(this=this), expression=exp.func("LIST_COUNT", this)),
 562            expression=exp.false(),
 563        )
 564        return self.sql(exp.If(this=expr.is_(exp.Null()), true=check_null_in_array, false=func))
 565
 566    return func
 567
 568
 569def _array_overlaps_sql(self: DuckDBGenerator, expression: exp.ArrayOverlaps) -> str:
 570    """
 571    Translates Snowflake's NULL-safe ARRAYS_OVERLAP to DuckDB.
 572
 573    DuckDB's native && operator is not NULL-safe: [1,NULL,3] && [NULL,4,5] returns FALSE.
 574    Snowflake returns TRUE when both arrays contain NULL (NULLs are treated as known values).
 575
 576    Generated SQL: (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
 577
 578    ARRAY_LENGTH counts all elements (including NULLs); LIST_COUNT counts only non-NULLs.
 579    When they differ, the array contains at least one NULL, matching Snowflake's NULL-safe semantics.
 580    """
 581    if not expression.args.get("null_safe"):
 582        return self.binary(expression, "&&")
 583
 584    arr1 = expression.this
 585    arr2 = expression.expression
 586
 587    check_nulls = exp.and_(
 588        exp.NEQ(
 589            this=exp.ArraySize(this=arr1.copy()),
 590            expression=exp.func("LIST_COUNT", arr1.copy()),
 591        ),
 592        exp.NEQ(
 593            this=exp.ArraySize(this=arr2.copy()),
 594            expression=exp.func("LIST_COUNT", arr2.copy()),
 595        ),
 596        copy=False,
 597    )
 598
 599    overlap = exp.ArrayOverlaps(this=arr1.copy(), expression=arr2.copy())
 600
 601    return self.sql(
 602        exp.or_(
 603            exp.paren(overlap, copy=False),
 604            exp.paren(check_nulls, copy=False),
 605            copy=False,
 606            wrap=False,
 607        )
 608    )
 609
 610
 611def _struct_sql(self: DuckDBGenerator, expression: exp.Struct) -> str:
 612    ancestor_cast = expression.find_ancestor(exp.Cast, exp.Select)
 613    ancestor_cast = None if isinstance(ancestor_cast, exp.Select) else ancestor_cast
 614
 615    # Empty struct cast works with MAP() since DuckDB can't parse {}
 616    if not expression.expressions:
 617        if isinstance(ancestor_cast, exp.Cast) and ancestor_cast.to.is_type(exp.DType.MAP):
 618            return "MAP()"
 619
 620    args: list[str] = []
 621
 622    # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
 623    # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB
 624    # The transformation to ROW will take place if:
 625    #  1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would
 626    #  2. A cast to STRUCT / ARRAY of STRUCTs is found
 627    is_bq_inline_struct = (
 628        (expression.find(exp.PropertyEQ) is None)
 629        and ancestor_cast
 630        and any(
 631            casted_type.is_type(exp.DType.STRUCT)
 632            for casted_type in ancestor_cast.find_all(exp.DataType)
 633        )
 634    )
 635
 636    for i, expr in enumerate(expression.expressions):
 637        is_property_eq = isinstance(expr, exp.PropertyEQ)
 638        this = expr.this
 639        value = expr.expression if is_property_eq else expr
 640
 641        if is_bq_inline_struct:
 642            args.append(self.sql(value))
 643        else:
 644            if isinstance(this, exp.Identifier):
 645                key = self.sql(exp.Literal.string(expr.name))
 646            elif is_property_eq:
 647                key = self.sql(this)
 648            else:
 649                key = self.sql(exp.Literal.string(f"_{i}"))
 650
 651            args.append(f"{key}: {self.sql(value)}")
 652
 653    csv_args = ", ".join(args)
 654
 655    return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}"
 656
 657
 658def _datatype_sql(self: DuckDBGenerator, expression: exp.DataType) -> str:
 659    if expression.is_type("array"):
 660        return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]"
 661
 662    # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE
 663    if expression.is_type(exp.DType.TIME, exp.DType.TIMETZ, exp.DType.TIMESTAMPTZ):
 664        return expression.this.value
 665
 666    return self.datatype_sql(expression)
 667
 668
 669def _json_format_sql(self: DuckDBGenerator, expression: exp.JSONFormat) -> str:
 670    sql = self.func("TO_JSON", expression.this, expression.args.get("options"))
 671    return f"CAST({sql} AS TEXT)"
 672
 673
 674def _build_seq_expression(base: exp.Expr, byte_width: int, signed: bool) -> exp.Expr:
 675    """Build a SEQ expression with the given base, byte width, and signedness."""
 676    bits = byte_width * 8
 677    max_val = exp.Literal.number(2**bits)
 678
 679    if signed:
 680        half = exp.Literal.number(2 ** (bits - 1))
 681        return exp.replace_placeholders(_SEQ_SIGNED.copy(), base=base, max_val=max_val, half=half)
 682    return exp.replace_placeholders(_SEQ_UNSIGNED.copy(), base=base, max_val=max_val)
 683
 684
 685def _seq_to_range_in_generator(expression: exp.Expr) -> exp.Expr:
 686    """
 687    Transform SEQ functions to `range` column references when inside a GENERATOR context.
 688
 689    When GENERATOR(ROWCOUNT => N) becomes RANGE(N) in DuckDB, it produces a column
 690    named `range` with values 0, 1, ..., N-1. SEQ functions produce the same sequence,
 691    so we replace them with `range % max_val` to avoid nested window function issues.
 692    """
 693    if not isinstance(expression, exp.Select):
 694        return expression
 695
 696    from_ = expression.args.get("from_")
 697    if not (
 698        from_
 699        and isinstance(from_.this, exp.TableFromRows)
 700        and isinstance(from_.this.this, exp.Generator)
 701    ):
 702        return expression
 703
 704    def replace_seq(node: exp.Expr) -> exp.Expr:
 705        if isinstance(node, (exp.Seq1, exp.Seq2, exp.Seq4, exp.Seq8)):
 706            byte_width = _SEQ_BYTE_WIDTH[type(node)]
 707            return _build_seq_expression(exp.column("range"), byte_width, signed=node.name == "1")
 708        return node
 709
 710    return expression.transform(replace_seq, copy=False)
 711
 712
 713def _seq_sql(self: DuckDBGenerator, expression: exp.Func, byte_width: int) -> str:
 714    """
 715    Transpile Snowflake SEQ1/SEQ2/SEQ4/SEQ8 to DuckDB.
 716
 717    Generates monotonically increasing integers starting from 0.
 718    The signed parameter (0 or 1) affects wrap-around behavior:
 719    - Unsigned (0): wraps at 2^(bits) - 1
 720    - Signed (1): wraps at 2^(bits-1) - 1, then goes negative
 721    """
 722    # Warn if SEQ is in a restricted context (Select stops search at current scope)
 723    ancestor = expression.find_ancestor(*_SEQ_RESTRICTED)
 724    if ancestor and (
 725        (not isinstance(ancestor, (exp.Order, exp.Select)))
 726        or (isinstance(ancestor, exp.Order) and isinstance(ancestor.parent, exp.Window))
 727    ):
 728        self.unsupported("SEQ in restricted context is not supported - use CTE or subquery")
 729
 730    result = _build_seq_expression(_SEQ_BASE.copy(), byte_width, signed=expression.name == "1")
 731    return self.sql(result)
 732
 733
 734def _unix_to_time_sql(self: DuckDBGenerator, expression: exp.UnixToTime) -> str:
 735    scale = expression.args.get("scale")
 736    timestamp = expression.this
 737    target_type = expression.args.get("target_type")
 738
 739    # Check if we need NTZ (naive timestamp in UTC)
 740    is_ntz = target_type and target_type.this in (
 741        exp.DType.TIMESTAMP,
 742        exp.DType.TIMESTAMPNTZ,
 743    )
 744
 745    if scale == exp.UnixToTime.MILLIS:
 746        # EPOCH_MS already returns TIMESTAMP (naive, UTC)
 747        return self.func("EPOCH_MS", timestamp)
 748    if scale == exp.UnixToTime.MICROS:
 749        # MAKE_TIMESTAMP already returns TIMESTAMP (naive, UTC)
 750        return self.func("MAKE_TIMESTAMP", timestamp)
 751
 752    # Other scales: divide and use TO_TIMESTAMP
 753    if scale not in (None, exp.UnixToTime.SECONDS):
 754        timestamp = exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))
 755
 756    to_timestamp: exp.Expr = exp.Anonymous(this="TO_TIMESTAMP", expressions=[timestamp])
 757
 758    if is_ntz:
 759        to_timestamp = exp.AtTimeZone(this=to_timestamp, zone=exp.Literal.string("UTC"))
 760
 761    return self.sql(to_timestamp)
 762
 763
 764WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In, exp.Not)
 765
 766
 767def _arrow_json_extract_sql(self: DuckDBGenerator, expression: JSON_EXTRACT_TYPE) -> str:
 768    arrow_sql = arrow_json_extract_sql(self, expression)
 769    if not expression.same_parent and isinstance(
 770        expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS
 771    ):
 772        arrow_sql = self.wrap(arrow_sql)
 773    return arrow_sql
 774
 775
 776def _implicit_datetime_cast(
 777    arg: exp.Expr | None, type: exp.DType = exp.DType.DATE
 778) -> exp.Expr | None:
 779    if isinstance(arg, exp.Literal) and arg.is_string:
 780        ts = arg.name
 781        if type == exp.DType.DATE and ":" in ts:
 782            type = exp.DType.TIMESTAMPTZ if TIMEZONE_PATTERN.search(ts) else exp.DType.TIMESTAMP
 783
 784        arg = exp.cast(arg, type)
 785
 786    return arg
 787
 788
 789def _week_unit_to_dow(unit: exp.Expr | None) -> int | None:
 790    """
 791    Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
 792    from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
 793
 794    Args:
 795        unit: The unit expression (Var for ISOWEEK or WeekStart)
 796
 797    Returns:
 798        The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
 799
 800        Examples:
 801            "WEEK(SUNDAY)" -> 7
 802            "WEEK(MONDAY)" -> 1
 803            "ISOWEEK" -> 1
 804    """
 805    # Handle plain Var expressions for ISOWEEK only
 806    if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
 807        return 1
 808
 809    # Handle WeekStart expressions with explicit day
 810    if isinstance(unit, exp.WeekStart):
 811        return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
 812
 813    return None
 814
 815
 816def _build_week_trunc_expression(
 817    date_expr: exp.Expr,
 818    start_dow: int,
 819    preserve_start_day: bool = False,
 820) -> exp.Expr:
 821    """
 822    Build DATE_TRUNC expression for week boundaries with custom start day.
 823
 824    DuckDB's DATE_TRUNC('WEEK', ...) always returns Monday. To align to a different
 825    start day, we shift the date before truncating.
 826
 827    Args:
 828        date_expr: The date expression to truncate.
 829        start_dow: ISO 8601 day-of-week number (Monday=1, ..., Sunday=7).
 830        preserve_start_day: If True, reverse the shift after truncating so the result lands on the
 831            correct week start day. Needed for DATE_TRUNC (absolute result matters) but
 832            not for DATE_DIFF (only relative alignment matters).
 833
 834    Shift formula: Sunday (7) gets +1, others get (1 - start_dow).
 835    """
 836    shift_days = 1 if start_dow == 7 else 1 - start_dow
 837    truncated = exp.func("DATE_TRUNC", unit=exp.var("WEEK"), this=date_expr)
 838
 839    if shift_days == 0:
 840        return truncated
 841
 842    shift = exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY"))
 843    shifted_date = exp.DateAdd(this=date_expr, expression=shift)
 844    truncated.set("this", shifted_date)
 845
 846    if preserve_start_day:
 847        interval = exp.Interval(this=exp.Literal.string(str(-shift_days)), unit=exp.var("DAY"))
 848        return exp.cast(
 849            exp.DateAdd(this=truncated, expression=interval), to=exp.DType.DATE, copy=False
 850        )
 851
 852    return truncated
 853
 854
 855def _date_diff_sql(self: DuckDBGenerator, expression: exp.DateDiff | exp.DatetimeDiff) -> str:
 856    unit = expression.unit
 857
 858    if _is_nanosecond_unit(unit):
 859        return _handle_nanosecond_diff(self, expression.this, expression.expression)
 860
 861    this = _implicit_datetime_cast(expression.this)
 862    expr = _implicit_datetime_cast(expression.expression)
 863
 864    # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
 865    #  SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
 866    #  SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
 867    # Whereas for other units such as MONTH it does respect month boundaries:
 868    #  SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
 869    date_part_boundary = expression.args.get("date_part_boundary")
 870
 871    # Extract week start day; returns None if day is dynamic (column/placeholder)
 872    week_start = _week_unit_to_dow(unit)
 873    if date_part_boundary and week_start and this and expr:
 874        expression.set("unit", exp.Literal.string("WEEK"))
 875
 876        # Truncate both dates to week boundaries to respect input dialect semantics
 877        this = _build_week_trunc_expression(this, week_start)
 878        expr = _build_week_trunc_expression(expr, week_start)
 879
 880    return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
 881
 882
 883def _generate_datetime_array_sql(
 884    self: DuckDBGenerator, expression: exp.GenerateDateArray | exp.GenerateTimestampArray
 885) -> str:
 886    is_generate_date_array = isinstance(expression, exp.GenerateDateArray)
 887
 888    type = exp.DType.DATE if is_generate_date_array else exp.DType.TIMESTAMP
 889    start = _implicit_datetime_cast(expression.args.get("start"), type=type)
 890    end = _implicit_datetime_cast(expression.args.get("end"), type=type)
 891
 892    # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES
 893    gen_series: exp.GenerateSeries | exp.Cast = exp.GenerateSeries(
 894        start=start, end=end, step=expression.args.get("step")
 895    )
 896
 897    if is_generate_date_array:
 898        # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for
 899        # GENERATE_DATE_ARRAY we must cast it back to DATE array
 900        gen_series = exp.cast(gen_series, exp.DataType.from_str("ARRAY<DATE>"))
 901
 902    return self.sql(gen_series)
 903
 904
 905def _json_extract_value_array_sql(
 906    self: DuckDBGenerator, expression: exp.JSONValueArray | exp.JSONExtractArray
 907) -> str:
 908    json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression)
 909    data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>"
 910    return self.sql(exp.cast(json_extract, to=exp.DataType.from_str(data_type)))
 911
 912
 913def _cast_to_varchar(arg: exp.Expr | None) -> exp.Expr | None:
 914    if arg and arg.type and not arg.is_type(*exp.DataType.TEXT_TYPES, exp.DType.UNKNOWN):
 915        return exp.cast(arg, exp.DType.VARCHAR)
 916    return arg
 917
 918
 919def _cast_to_boolean(arg: exp.Expr | None) -> exp.Expr | None:
 920    if arg and not arg.is_type(exp.DType.BOOLEAN):
 921        return exp.cast(arg, exp.DType.BOOLEAN)
 922    return arg
 923
 924
 925def _is_binary(arg: exp.Expr) -> bool:
 926    return arg.is_type(
 927        exp.DType.BINARY,
 928        exp.DType.VARBINARY,
 929        exp.DType.BLOB,
 930    )
 931
 932
 933def _gen_with_cast_to_blob(self: DuckDBGenerator, expression: exp.Expr, result_sql: str) -> str:
 934    if _is_binary(expression):
 935        blob = exp.DataType.from_str("BLOB", dialect="duckdb")
 936        result_sql = self.sql(exp.Cast(this=result_sql, to=blob))
 937    return result_sql
 938
 939
 940def _cast_to_bit(arg: exp.Expr) -> exp.Expr:
 941    if not _is_binary(arg):
 942        return arg
 943
 944    if isinstance(arg, exp.HexString):
 945        arg = exp.Unhex(this=exp.Literal.string(arg.this))
 946
 947    return exp.cast(arg, exp.DType.BIT)
 948
 949
 950def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
 951    if _is_binary(expression.this):
 952        expression.set("this", _cast_to_bit(expression.this))
 953    if _is_binary(expression.expression):
 954        expression.set("expression", _cast_to_bit(expression.expression))
 955
 956
 957def _day_navigation_sql(self: DuckDBGenerator, expression: exp.NextDay | exp.PreviousDay) -> str:
 958    """
 959    Transpile Snowflake's NEXT_DAY / PREVIOUS_DAY to DuckDB using date arithmetic.
 960
 961    Returns the DATE of the next/previous occurrence of the specified weekday.
 962
 963    Formulas:
 964    - NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
 965    - PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
 966
 967    Supports both literal and non-literal day names:
 968    - Literal: Direct lookup (e.g., 'Monday' -> 1)
 969    - Non-literal: CASE statement for runtime evaluation
 970
 971    Examples:
 972        NEXT_DAY('2024-01-01' (Monday), 'Monday')
 973          -> (1 - 1 + 6) % 7 + 1 = 6 % 7 + 1 = 7 days -> 2024-01-08
 974
 975        PREVIOUS_DAY('2024-01-15' (Monday), 'Friday')
 976          -> (1 - 5 + 6) % 7 + 1 = 2 % 7 + 1 = 3 days -> 2024-01-12
 977    """
 978    date_expr = expression.this
 979    day_name_expr = expression.expression
 980
 981    # Build ISODOW call for current day of week
 982    isodow_call = exp.func("ISODOW", date_expr)
 983
 984    # Determine target day of week
 985    if isinstance(day_name_expr, exp.Literal):
 986        # Literal day name: lookup target_dow directly
 987        day_name_str = day_name_expr.name.upper()
 988        matching_day = next(
 989            (day for day in WEEK_START_DAY_TO_DOW if day.startswith(day_name_str)), None
 990        )
 991        if matching_day:
 992            target_dow: exp.Expr = exp.Literal.number(WEEK_START_DAY_TO_DOW[matching_day])
 993        else:
 994            # Unrecognized day name, use fallback
 995            return self.function_fallback_sql(expression)
 996    else:
 997        # Non-literal day name: build CASE statement for runtime mapping
 998        upper_day_name = exp.Upper(this=day_name_expr)
 999        target_dow = exp.Case(
1000            ifs=[
1001                exp.If(
1002                    this=exp.func(
1003                        "STARTS_WITH", upper_day_name.copy(), exp.Literal.string(day[:2])
1004                    ),
1005                    true=exp.Literal.number(dow_num),
1006                )
1007                for day, dow_num in WEEK_START_DAY_TO_DOW.items()
1008            ]
1009        )
1010
1011    # Calculate days offset and apply interval based on direction
1012    if isinstance(expression, exp.NextDay):
1013        # NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
1014        days_offset = exp.paren(target_dow - isodow_call + 6, copy=False) % 7 + 1
1015        date_with_offset = date_expr + exp.Interval(this=days_offset, unit=exp.var("DAY"))
1016    else:  # exp.PreviousDay
1017        # PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
1018        days_offset = exp.paren(isodow_call - target_dow + 6, copy=False) % 7 + 1
1019        date_with_offset = date_expr - exp.Interval(this=days_offset, unit=exp.var("DAY"))
1020
1021    # Build final: CAST(date_with_offset AS DATE)
1022    return self.sql(exp.cast(date_with_offset, exp.DType.DATE))
1023
1024
1025def _anyvalue_sql(self: DuckDBGenerator, expression: exp.AnyValue) -> str:
1026    # Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
1027    having = expression.this
1028    if isinstance(having, exp.HavingMax):
1029        func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL"
1030        return self.func(func_name, having.this, having.expression)
1031    return self.function_fallback_sql(expression)
1032
1033
1034def _bitwise_agg_sql(
1035    self: DuckDBGenerator,
1036    expression: exp.BitwiseOrAgg | exp.BitwiseAndAgg | exp.BitwiseXorAgg,
1037) -> str:
1038    """
1039    DuckDB's bitwise aggregate functions only accept integer types. For other types:
1040    - DECIMAL/STRING: Use CAST(arg AS INT) to convert directly, will round to nearest int
1041    - FLOAT/DOUBLE: Use ROUND(arg)::INT to round to nearest integer, required due to float precision loss
1042    """
1043    if isinstance(expression, exp.BitwiseOrAgg):
1044        func_name = "BIT_OR"
1045    elif isinstance(expression, exp.BitwiseAndAgg):
1046        func_name = "BIT_AND"
1047    else:  # exp.BitwiseXorAgg
1048        func_name = "BIT_XOR"
1049
1050    arg = expression.this
1051
1052    if not arg.type:
1053        from sqlglot.optimizer.annotate_types import annotate_types
1054
1055        arg = annotate_types(arg, dialect=self.dialect)
1056
1057    if arg.is_type(*exp.DataType.REAL_TYPES, *exp.DataType.TEXT_TYPES):
1058        if arg.is_type(*exp.DataType.FLOAT_TYPES):
1059            # float types need to be rounded first due to precision loss
1060            arg = exp.func("ROUND", arg)
1061
1062        arg = exp.cast(arg, exp.DType.INT)
1063
1064    return self.func(func_name, arg)
1065
1066
1067def _literal_sql_with_ws_chr(self: DuckDBGenerator, literal: str) -> str:
1068    # DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
1069    if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
1070        return self.sql(exp.Literal.string(literal))
1071
1072    sql_segments: list[str] = []
1073    for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK):
1074        if is_ws_control:
1075            for ch in group:
1076                duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch]
1077                sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
1078        else:
1079            sql_segments.append(self.sql(exp.Literal.string("".join(group))))
1080
1081    sql = " || ".join(sql_segments)
1082    return sql if len(sql_segments) == 1 else f"({sql})"
1083
1084
1085def _escape_regex_metachars(
1086    self: DuckDBGenerator, delimiters: exp.Expr | None, delimiters_sql: str
1087) -> str:
1088    r"""
1089    Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions.
1090
1091    Literal strings are escaped at transpile time, expressions handled with REPLACE() calls.
1092    """
1093    if not delimiters:
1094        return delimiters_sql
1095
1096    if delimiters.is_string:
1097        literal_value = delimiters.this
1098        escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
1099        return _literal_sql_with_ws_chr(self, escaped_literal)
1100
1101    escaped_sql = delimiters_sql
1102    for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
1103        escaped_sql = self.func(
1104            "REPLACE",
1105            escaped_sql,
1106            self.sql(exp.Literal.string(raw)),
1107            self.sql(exp.Literal.string(escaped)),
1108        )
1109
1110    return escaped_sql
1111
1112
1113def _build_capitalization_sql(
1114    self: DuckDBGenerator,
1115    value_to_split: str,
1116    delimiters_sql: str,
1117) -> str:
1118    # empty string delimiter --> treat value as one word, no need to split
1119    if delimiters_sql == "''":
1120        return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
1121
1122    delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
1123    split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
1124
1125    # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
1126    # We do not know whether the first segment is a delimiter or not, so we check the first character of the string
1127    # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
1128    return self.func(
1129        "ARRAY_TO_STRING",
1130        exp.case()
1131        .when(
1132            f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})",
1133            self.func(
1134                "LIST_TRANSFORM",
1135                self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
1136                "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
1137            ),
1138        )
1139        .else_(
1140            self.func(
1141                "LIST_TRANSFORM",
1142                self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
1143                "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
1144            ),
1145        ),
1146        "''",
1147    )
1148
1149
1150def _initcap_sql(self: DuckDBGenerator, expression: exp.Initcap) -> str:
1151    this_sql = self.sql(expression, "this")
1152    delimiters = expression.args.get("expression")
1153    if delimiters is None:
1154        # fallback for manually created exp.Initcap w/o delimiters arg
1155        delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)
1156    delimiters_sql = self.sql(delimiters)
1157
1158    escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
1159
1160    return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
1161
1162
1163def _boolxor_agg_sql(self: DuckDBGenerator, expression: exp.BoolxorAgg) -> str:
1164    """
1165    Snowflake's `BOOLXOR_AGG(col)` returns TRUE if exactly one input in `col` is TRUE, FALSE otherwise;
1166    Since DuckDB does not have a mapping function, we mimic the behavior by generating `COUNT_IF(col) = 1`.
1167
1168    DuckDB's COUNT_IF strictly requires boolean inputs, so cast if not already boolean.
1169    """
1170    return self.sql(
1171        exp.EQ(
1172            this=exp.CountIf(this=_cast_to_boolean(expression.this)),
1173            expression=exp.Literal.number(1),
1174        )
1175    )
1176
1177
1178def _bitshift_sql(
1179    self: DuckDBGenerator, expression: exp.BitwiseLeftShift | exp.BitwiseRightShift
1180) -> str:
1181    """
1182    Transform bitshift expressions for DuckDB by injecting BIT/INT128 casts.
1183
1184    DuckDB's bitwise shift operators don't work with BLOB/BINARY types, so we cast
1185    them to BIT for the operation, then cast the result back to the original type.
1186
1187    Note: Assumes type annotation has been applied with the source dialect.
1188    """
1189    operator = "<<" if isinstance(expression, exp.BitwiseLeftShift) else ">>"
1190    result_is_blob = False
1191    this = expression.this
1192
1193    if _is_binary(this):
1194        result_is_blob = True
1195        expression.set("this", exp.cast(this, exp.DType.BIT))
1196    elif expression.args.get("requires_int128"):
1197        this.replace(exp.cast(this, exp.DType.INT128))
1198
1199    result_sql = self.binary(expression, operator)
1200
1201    # Wrap in parentheses if parent is a bitwise operator to "fix" DuckDB precedence issue
1202    # DuckDB parses: a << b | c << d  as  (a << b | c) << d
1203    if isinstance(expression.parent, exp.Binary):
1204        result_sql = self.sql(exp.Paren(this=result_sql))
1205
1206    if result_is_blob:
1207        result_sql = self.sql(
1208            exp.Cast(this=result_sql, to=exp.DataType.from_str("BLOB", dialect="duckdb"))
1209        )
1210
1211    return result_sql
1212
1213
1214def _scale_rounding_sql(
1215    self: DuckDBGenerator,
1216    expression: exp.Expr,
1217    rounding_func: Type[exp.Expr],
1218) -> str | None:
1219    """
1220    Handle scale parameter transformation for rounding functions.
1221
1222    DuckDB doesn't support the scale parameter for certain functions (e.g., FLOOR, CEIL),
1223    so we transform: FUNC(x, n) to ROUND(FUNC(x * 10^n) / 10^n, n)
1224
1225    Args:
1226        self: The DuckDB generator instance
1227        expression: The expression to transform (must have 'this', 'decimals', and 'to' args)
1228        rounding_func: The rounding function class to use in the transformation
1229
1230    Returns:
1231        The transformed SQL string if decimals parameter exists, None otherwise
1232    """
1233    decimals = expression.args.get("decimals")
1234
1235    if decimals is None or expression.args.get("to") is not None:
1236        return None
1237
1238    this = expression.this
1239    if isinstance(this, exp.Binary):
1240        this = exp.Paren(this=this)
1241
1242    n_int = decimals
1243    if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
1244        n_int = exp.cast(decimals, exp.DType.INT)
1245
1246    pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
1247    rounded = rounding_func(this=exp.Mul(this=this, expression=pow_))
1248    result = exp.Div(this=rounded, expression=pow_.copy())
1249
1250    return self.round_sql(
1251        exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
1252    )
1253
1254
1255def _ceil_floor(self: DuckDBGenerator, expression: exp.Floor | exp.Ceil) -> str:
1256    scaled_sql = _scale_rounding_sql(self, expression, type(expression))
1257    if scaled_sql is not None:
1258        return scaled_sql
1259    return self.ceil_floor(expression)
1260
1261
1262def _regr_val_sql(
1263    self: DuckDBGenerator,
1264    expression: exp.RegrValx | exp.RegrValy,
1265) -> str:
1266    """
1267    Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent.
1268
1269    REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x.
1270    REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y.
1271    """
1272    from sqlglot.optimizer.annotate_types import annotate_types
1273
1274    y = expression.this
1275    x = expression.expression
1276
1277    # Determine which argument to check for NULL and which to return based on expression type
1278    if isinstance(expression, exp.RegrValx):
1279        # REGR_VALX: check y for NULL, return x
1280        check_for_null = y
1281        return_value = x
1282        return_value_attr = "expression"
1283    else:
1284        # REGR_VALY: check x for NULL, return y
1285        check_for_null = x
1286        return_value = y
1287        return_value_attr = "this"
1288
1289    # Get the type from the return argument
1290    result_type = return_value.type
1291
1292    # If no type info, annotate the expression to infer types
1293    if not result_type or result_type.this == exp.DType.UNKNOWN:
1294        try:
1295            annotated = annotate_types(expression.copy(), dialect=self.dialect)
1296            result_type = getattr(annotated, return_value_attr).type
1297        except Exception:
1298            pass
1299
1300    # Default to DOUBLE for regression functions if type still unknown
1301    if not result_type or result_type.this == exp.DType.UNKNOWN:
1302        result_type = exp.DType.DOUBLE.into_expr()
1303
1304    # Cast NULL to the same type as return_value to avoid DuckDB type inference issues
1305    typed_null = exp.Cast(this=exp.Null(), to=result_type)
1306
1307    return self.sql(
1308        exp.If(
1309            this=exp.Is(this=check_for_null.copy(), expression=exp.Null()),
1310            true=typed_null,
1311            false=return_value.copy(),
1312        )
1313    )
1314
1315
1316def _maybe_corr_null_to_false(
1317    expression: exp.Filter | exp.Window | exp.Corr,
1318) -> exp.Filter | exp.Window | exp.Corr | None:
1319    corr = expression
1320    while isinstance(corr, (exp.Window, exp.Filter)):
1321        corr = corr.this
1322
1323    if not isinstance(corr, exp.Corr) or not corr.args.get("null_on_zero_variance"):
1324        return None
1325
1326    corr.set("null_on_zero_variance", False)
1327    return expression
1328
1329
1330def _date_from_parts_sql(self, expression: exp.DateFromParts) -> str:
1331    """
1332    Snowflake's DATE_FROM_PARTS allows out-of-range values for the month and day input.
1333    E.g., larger values (month=13, day=100), zero-values (month=0, day=0), negative values (month=-13, day=-100).
1334
1335    DuckDB's MAKE_DATE does not support out-of-range values, but DuckDB's INTERVAL type does.
1336
1337    We convert to date arithmetic:
1338    DATE_FROM_PARTS(year, month, day)
1339    - MAKE_DATE(year, 1, 1) + INTERVAL (month-1) MONTH + INTERVAL (day-1) DAY
1340    """
1341    year_expr = expression.args.get("year")
1342    month_expr = expression.args.get("month")
1343    day_expr = expression.args.get("day")
1344
1345    if expression.args.get("allow_overflow"):
1346        base_date: exp.Expr = exp.func(
1347            "MAKE_DATE", year_expr, exp.Literal.number(1), exp.Literal.number(1)
1348        )
1349
1350        if month_expr:
1351            base_date = base_date + exp.Interval(this=month_expr - 1, unit=exp.var("MONTH"))
1352
1353        if day_expr:
1354            base_date = base_date + exp.Interval(this=day_expr - 1, unit=exp.var("DAY"))
1355
1356        return self.sql(exp.cast(expression=base_date, to=exp.DType.DATE))
1357
1358    return self.func("MAKE_DATE", year_expr, month_expr, day_expr)
1359
1360
1361def _round_arg(arg: exp.Expr, round_input: bool | None = None) -> exp.Expr:
1362    if round_input:
1363        return exp.func("ROUND", arg, exp.Literal.number(0))
1364    return arg
1365
1366
1367def _boolnot_sql(self: DuckDBGenerator, expression: exp.Boolnot) -> str:
1368    arg = _round_arg(expression.this, expression.args.get("round_input"))
1369    return self.sql(exp.not_(exp.paren(arg)))
1370
1371
1372def _booland_sql(self: DuckDBGenerator, expression: exp.Booland) -> str:
1373    round_input = expression.args.get("round_input")
1374    left = _round_arg(expression.this, round_input)
1375    right = _round_arg(expression.expression, round_input)
1376    return self.sql(exp.paren(exp.and_(exp.paren(left), exp.paren(right), wrap=False)))
1377
1378
1379def _boolor_sql(self: DuckDBGenerator, expression: exp.Boolor) -> str:
1380    round_input = expression.args.get("round_input")
1381    left = _round_arg(expression.this, round_input)
1382    right = _round_arg(expression.expression, round_input)
1383    return self.sql(exp.paren(exp.or_(exp.paren(left), exp.paren(right), wrap=False)))
1384
1385
1386def _xor_sql(self: DuckDBGenerator, expression: exp.Xor) -> str:
1387    round_input = expression.args.get("round_input")
1388    left = _round_arg(expression.this, round_input)
1389    right = _round_arg(expression.expression, round_input)
1390    return self.sql(
1391        exp.or_(
1392            exp.paren(exp.and_(left.copy(), exp.paren(right.not_()), wrap=False)),
1393            exp.paren(exp.and_(exp.paren(left.not_()), right.copy(), wrap=False)),
1394            wrap=False,
1395        )
1396    )
1397
1398
1399def _explode_to_unnest_sql(self: DuckDBGenerator, expression: exp.Lateral) -> str:
1400    """Handle LATERAL VIEW EXPLODE/INLINE conversion to UNNEST for DuckDB."""
1401    explode = expression.this
1402
1403    if isinstance(explode, exp.Inline):
1404        # For INLINE, create CROSS JOIN LATERAL (SELECT UNNEST(..., max_depth => 2))
1405        # Build the UNNEST call with DuckDB-style named parameter
1406        unnest_expr = exp.Unnest(
1407            expressions=[
1408                explode.this,
1409                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)),
1410            ]
1411        )
1412        select_expr = exp.Select(expressions=[unnest_expr]).subquery()
1413
1414        alias_expr = expression.args.get("alias")
1415        if alias_expr and not alias_expr.this:
1416            # we need to provide a table name if not present
1417            alias_expr.set("this", exp.to_identifier(f"_u_{expression.index}"))
1418
1419        transformed_lateral_expr = exp.Lateral(this=select_expr, alias=alias_expr)
1420        cross_join_lateral_expr = exp.Join(this=transformed_lateral_expr, kind="CROSS")
1421
1422        return self.sql(cross_join_lateral_expr)
1423
1424    # For other cases, use the standard conversion
1425    return explode_to_unnest_sql(self, expression)
1426
1427
1428def _sha_sql(
1429    self: DuckDBGenerator,
1430    expression: exp.Expr,
1431    hash_func: str,
1432    is_binary: bool = False,
1433) -> str:
1434    arg = expression.this
1435
1436    # For SHA2 variants, check digest length (DuckDB only supports SHA256)
1437    if hash_func == "SHA256":
1438        length = expression.text("length") or "256"
1439        if length != "256":
1440            self.unsupported("DuckDB only supports SHA256 hashing algorithm.")
1441
1442    # Cast if type is incompatible with DuckDB
1443    if (
1444        arg.type
1445        and arg.type.this != exp.DType.UNKNOWN
1446        and not arg.is_type(*exp.DataType.TEXT_TYPES)
1447        and not _is_binary(arg)
1448    ):
1449        arg = exp.cast(arg, exp.DType.VARCHAR)
1450
1451    result = self.func(hash_func, arg)
1452    return self.func("UNHEX", result) if is_binary else result
1453
1454
1455class DuckDBGenerator(generator.Generator):
1456    PARAMETER_TOKEN = "$"
1457    NAMED_PLACEHOLDER_TOKEN = "$"
1458    JOIN_HINTS = False
1459    TABLE_HINTS = False
1460    QUERY_HINTS = False
1461    LIMIT_FETCH = "LIMIT"
1462    STRUCT_DELIMITER = ("(", ")")
1463    RENAME_TABLE_WITH_DB = False
1464    NVL2_SUPPORTED = False
1465    SEMI_ANTI_JOIN_WITH_SIDE = False
1466    TABLESAMPLE_KEYWORDS = "USING SAMPLE"
1467    TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
1468    LAST_DAY_SUPPORTS_DATE_PART = False
1469    JSON_KEY_VALUE_PAIR_SEP = ","
1470    IGNORE_NULLS_IN_FUNC = True
1471    IGNORE_NULLS_BEFORE_ORDER = False
1472    JSON_PATH_BRACKETED_KEY_SUPPORTED = False
1473    SUPPORTS_CREATE_TABLE_LIKE = False
1474    MULTI_ARG_DISTINCT = False
1475    CAN_IMPLEMENT_ARRAY_ANY = True
1476    SUPPORTS_TO_NUMBER = False
1477    SELECT_KINDS: tuple[str, ...] = ()
1478    SUPPORTS_DECODE_CASE = False
1479    SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
1480
1481    AFTER_HAVING_MODIFIER_TRANSFORMS = generator.AFTER_HAVING_MODIFIER_TRANSFORMS
1482    SUPPORTS_WINDOW_EXCLUDE = True
1483    COPY_HAS_INTO_KEYWORD = False
1484    STAR_EXCEPT = "EXCLUDE"
1485    PAD_FILL_PATTERN_IS_REQUIRED = True
1486    ARRAY_SIZE_DIM_REQUIRED: bool | None = False
1487    NORMALIZE_EXTRACT_DATE_PARTS = True
1488    SUPPORTS_LIKE_QUANTIFIERS = False
1489    SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
1490
1491    TRANSFORMS = {
1492        **generator.Generator.TRANSFORMS,
1493        exp.AnyValue: _anyvalue_sql,
1494        exp.ApproxDistinct: approx_count_distinct_sql,
1495        exp.Boolnot: _boolnot_sql,
1496        exp.Booland: _booland_sql,
1497        exp.Boolor: _boolor_sql,
1498        exp.Array: transforms.preprocess(
1499            [transforms.inherit_struct_field_names],
1500            generator=inline_array_unless_query,
1501        ),
1502        exp.ArrayAppend: array_append_sql("LIST_APPEND"),
1503        exp.ArrayCompact: array_compact_sql,
1504        exp.ArrayConstructCompact: lambda self, e: self.sql(
1505            exp.ArrayCompact(this=exp.Array(expressions=e.expressions))
1506        ),
1507        exp.ArrayConcat: array_concat_sql("LIST_CONCAT"),
1508        exp.ArrayContains: _array_contains_sql,
1509        exp.ArrayOverlaps: _array_overlaps_sql,
1510        exp.ArrayFilter: rename_func("LIST_FILTER"),
1511        exp.ArrayInsert: _array_insert_sql,
1512        exp.ArrayPosition: lambda self, e: (
1513            self.sql(
1514                exp.Sub(
1515                    this=exp.ArrayPosition(this=e.this, expression=e.expression),
1516                    expression=exp.Literal.number(1),
1517                )
1518            )
1519            if e.args.get("zero_based")
1520            else self.func("ARRAY_POSITION", e.this, e.expression)
1521        ),
1522        exp.ArrayRemoveAt: _array_remove_at_sql,
1523        exp.ArrayRemove: remove_from_array_using_filter,
1524        exp.ArraySort: _array_sort_sql,
1525        exp.ArrayPrepend: array_append_sql("LIST_PREPEND", swap_params=True),
1526        exp.ArraySum: rename_func("LIST_SUM"),
1527        exp.ArrayMax: rename_func("LIST_MAX"),
1528        exp.ArrayMin: rename_func("LIST_MIN"),
1529        exp.Base64DecodeBinary: lambda self, e: _base64_decode_sql(self, e, to_string=False),
1530        exp.Base64DecodeString: lambda self, e: _base64_decode_sql(self, e, to_string=True),
1531        exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
1532        exp.BitwiseAndAgg: _bitwise_agg_sql,
1533        exp.BitwiseCount: rename_func("BIT_COUNT"),
1534        exp.BitwiseLeftShift: _bitshift_sql,
1535        exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
1536        exp.BitwiseOrAgg: _bitwise_agg_sql,
1537        exp.BitwiseRightShift: _bitshift_sql,
1538        exp.BitwiseXorAgg: _bitwise_agg_sql,
1539        exp.CommentColumnConstraint: no_comment_column_constraint_sql,
1540        exp.Corr: lambda self, e: self._corr_sql(e),
1541        exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
1542        exp.CurrentTime: lambda *_: "CURRENT_TIME",
1543        exp.CurrentSchemas: lambda self, e: self.func(
1544            "current_schemas", e.this if e.this else exp.true()
1545        ),
1546        exp.CurrentTimestamp: lambda self, e: (
1547            self.sql(
1548                exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC"))
1549            )
1550            if e.args.get("sysdate")
1551            else "CURRENT_TIMESTAMP"
1552        ),
1553        exp.CurrentVersion: rename_func("version"),
1554        exp.Localtime: unsupported_args("this")(lambda *_: "LOCALTIME"),
1555        exp.DayOfMonth: rename_func("DAYOFMONTH"),
1556        exp.DayOfWeek: rename_func("DAYOFWEEK"),
1557        exp.DayOfWeekIso: rename_func("ISODOW"),
1558        exp.DayOfYear: rename_func("DAYOFYEAR"),
1559        exp.Dayname: lambda self, e: (
1560            self.func("STRFTIME", e.this, exp.Literal.string("%a"))
1561            if e.args.get("abbreviated")
1562            else self.func("DAYNAME", e.this)
1563        ),
1564        exp.Monthname: lambda self, e: (
1565            self.func("STRFTIME", e.this, exp.Literal.string("%b"))
1566            if e.args.get("abbreviated")
1567            else self.func("MONTHNAME", e.this)
1568        ),
1569        exp.DataType: _datatype_sql,
1570        exp.Date: _date_sql,
1571        exp.DateAdd: _date_delta_to_binary_interval_op(),
1572        exp.DateFromParts: _date_from_parts_sql,
1573        exp.DateSub: _date_delta_to_binary_interval_op(),
1574        exp.DateDiff: _date_diff_sql,
1575        exp.DateStrToDate: datestrtodate_sql,
1576        exp.Datetime: no_datetime_sql,
1577        exp.DatetimeDiff: _date_diff_sql,
1578        exp.DatetimeSub: _date_delta_to_binary_interval_op(),
1579        exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
1580        exp.DateToDi: lambda self, e: (
1581            f"CAST(STRFTIME({self.sql(e, 'this')}, {self.dialect.DATEINT_FORMAT}) AS INT)"
1582        ),
1583        exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
1584        exp.HexDecodeString: lambda self, e: self.sql(exp.Decode(this=exp.Unhex(this=e.this))),
1585        exp.DiToDate: lambda self, e: (
1586            f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {self.dialect.DATEINT_FORMAT}) AS DATE)"
1587        ),
1588        exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
1589        exp.EqualNull: lambda self, e: self.sql(
1590            exp.NullSafeEQ(this=e.this, expression=e.expression)
1591        ),
1592        exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
1593        exp.GenerateDateArray: _generate_datetime_array_sql,
1594        exp.GenerateSeries: generate_series_sql("GENERATE_SERIES", "RANGE"),
1595        exp.GenerateTimestampArray: _generate_datetime_array_sql,
1596        exp.Getbit: getbit_sql,
1597        exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
1598        exp.Explode: rename_func("UNNEST"),
1599        exp.IcebergProperty: lambda *_: "",
1600        exp.IntDiv: lambda self, e: self.binary(e, "//"),
1601        exp.IsInf: rename_func("ISINF"),
1602        exp.IsNan: rename_func("ISNAN"),
1603        exp.IsNullValue: lambda self, e: self.sql(
1604            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("NULL"))
1605        ),
1606        exp.IsArray: lambda self, e: self.sql(
1607            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("ARRAY"))
1608        ),
1609        exp.Ceil: _ceil_floor,
1610        exp.Floor: _ceil_floor,
1611        exp.JSONBExists: rename_func("JSON_EXISTS"),
1612        exp.JSONExtract: _arrow_json_extract_sql,
1613        exp.JSONExtractArray: _json_extract_value_array_sql,
1614        exp.JSONFormat: _json_format_sql,
1615        exp.JSONValueArray: _json_extract_value_array_sql,
1616        exp.Lateral: _explode_to_unnest_sql,
1617        exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)),
1618        exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)),
1619        exp.Select: transforms.preprocess([_seq_to_range_in_generator]),
1620        exp.Seq1: lambda self, e: _seq_sql(self, e, 1),
1621        exp.Seq2: lambda self, e: _seq_sql(self, e, 2),
1622        exp.Seq4: lambda self, e: _seq_sql(self, e, 4),
1623        exp.Seq8: lambda self, e: _seq_sql(self, e, 8),
1624        exp.BoolxorAgg: _boolxor_agg_sql,
1625        exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
1626        exp.Initcap: _initcap_sql,
1627        exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
1628        exp.SHA: lambda self, e: _sha_sql(self, e, "SHA1"),
1629        exp.SHA1Digest: lambda self, e: _sha_sql(self, e, "SHA1", is_binary=True),
1630        exp.SHA2: lambda self, e: _sha_sql(self, e, "SHA256"),
1631        exp.SHA2Digest: lambda self, e: _sha_sql(self, e, "SHA256", is_binary=True),
1632        exp.MonthsBetween: months_between_sql,
1633        exp.NextDay: _day_navigation_sql,
1634        exp.PercentileCont: rename_func("QUANTILE_CONT"),
1635        exp.PercentileDisc: rename_func("QUANTILE_DISC"),
1636        # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
1637        # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
1638        exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
1639        exp.PreviousDay: _day_navigation_sql,
1640        exp.RegexpILike: lambda self, e: self.func(
1641            "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
1642        ),
1643        exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
1644        exp.RegrValx: _regr_val_sql,
1645        exp.RegrValy: _regr_val_sql,
1646        exp.Return: lambda self, e: self.sql(e, "this"),
1647        exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
1648        exp.StrToUnix: lambda self, e: self.func(
1649            "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
1650        ),
1651        exp.Struct: _struct_sql,
1652        exp.Transform: rename_func("LIST_TRANSFORM"),
1653        exp.TimeAdd: _date_delta_to_binary_interval_op(),
1654        exp.TimeSub: _date_delta_to_binary_interval_op(),
1655        exp.Time: no_time_sql,
1656        exp.TimeDiff: _timediff_sql,
1657        exp.Timestamp: no_timestamp_sql,
1658        exp.TimestampAdd: _date_delta_to_binary_interval_op(),
1659        exp.TimestampDiff: lambda self, e: self.func(
1660            "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
1661        ),
1662        exp.TimestampSub: _date_delta_to_binary_interval_op(),
1663        exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DType.DATE)),
1664        exp.TimeStrToTime: timestrtotime_sql,
1665        exp.TimeStrToUnix: lambda self, e: self.func(
1666            "EPOCH", exp.cast(e.this, exp.DType.TIMESTAMP)
1667        ),
1668        exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
1669        exp.ToBoolean: _to_boolean_sql,
1670        exp.ToVariant: lambda self, e: self.sql(
1671            exp.cast(e.this, exp.DataType.from_str("VARIANT", dialect="duckdb"))
1672        ),
1673        exp.TimeToUnix: rename_func("EPOCH"),
1674        exp.TsOrDiToDi: lambda self, e: (
1675            f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)"
1676        ),
1677        exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
1678        exp.TsOrDsDiff: lambda self, e: self.func(
1679            "DATE_DIFF",
1680            f"'{e.args.get('unit') or 'DAY'}'",
1681            exp.cast(e.expression, exp.DType.TIMESTAMP),
1682            exp.cast(e.this, exp.DType.TIMESTAMP),
1683        ),
1684        exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
1685        exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
1686        exp.UnixSeconds: lambda self, e: self.sql(
1687            exp.cast(self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DType.BIGINT)
1688        ),
1689        exp.UnixToStr: lambda self, e: self.func(
1690            "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
1691        ),
1692        exp.DatetimeTrunc: lambda self, e: self.func(
1693            "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DType.DATETIME)
1694        ),
1695        exp.UnixToTime: _unix_to_time_sql,
1696        exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
1697        exp.VariancePop: rename_func("VAR_POP"),
1698        exp.WeekOfYear: rename_func("WEEKOFYEAR"),
1699        exp.YearOfWeek: lambda self, e: self.sql(
1700            exp.Extract(
1701                this=exp.Var(this="ISOYEAR"),
1702                expression=e.this,
1703            )
1704        ),
1705        exp.YearOfWeekIso: lambda self, e: self.sql(
1706            exp.Extract(
1707                this=exp.Var(this="ISOYEAR"),
1708                expression=e.this,
1709            )
1710        ),
1711        exp.Xor: _xor_sql,
1712        exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1713        exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1714        exp.DateBin: rename_func("TIME_BUCKET"),
1715        exp.LastDay: _last_day_sql,
1716    }
1717
1718    SUPPORTED_JSON_PATH_PARTS = {
1719        exp.JSONPathKey,
1720        exp.JSONPathRoot,
1721        exp.JSONPathSubscript,
1722        exp.JSONPathWildcard,
1723    }
1724
1725    TYPE_MAPPING = {
1726        **generator.Generator.TYPE_MAPPING,
1727        exp.DType.BINARY: "BLOB",
1728        exp.DType.BPCHAR: "TEXT",
1729        exp.DType.CHAR: "TEXT",
1730        exp.DType.DATETIME: "TIMESTAMP",
1731        exp.DType.DECFLOAT: "DECIMAL",
1732        exp.DType.FLOAT: "REAL",
1733        exp.DType.JSONB: "JSON",
1734        exp.DType.NCHAR: "TEXT",
1735        exp.DType.NVARCHAR: "TEXT",
1736        exp.DType.UINT: "UINTEGER",
1737        exp.DType.VARBINARY: "BLOB",
1738        exp.DType.ROWVERSION: "BLOB",
1739        exp.DType.VARCHAR: "TEXT",
1740        exp.DType.TIMESTAMPLTZ: "TIMESTAMPTZ",
1741        exp.DType.TIMESTAMPNTZ: "TIMESTAMP",
1742        exp.DType.TIMESTAMP_S: "TIMESTAMP_S",
1743        exp.DType.TIMESTAMP_MS: "TIMESTAMP_MS",
1744        exp.DType.TIMESTAMP_NS: "TIMESTAMP_NS",
1745        exp.DType.BIGDECIMAL: "DECIMAL",
1746    }
1747
1748    TYPE_PARAM_SETTINGS = {
1749        **generator.Generator.TYPE_PARAM_SETTINGS,
1750        exp.DType.BIGDECIMAL: ((38, 5), (38, 38)),
1751        exp.DType.DECFLOAT: ((38, 5), (38, 38)),
1752    }
1753
1754    # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
1755    RESERVED_KEYWORDS = {
1756        "array",
1757        "analyse",
1758        "union",
1759        "all",
1760        "when",
1761        "in_p",
1762        "default",
1763        "create_p",
1764        "window",
1765        "asymmetric",
1766        "to",
1767        "else",
1768        "localtime",
1769        "from",
1770        "end_p",
1771        "select",
1772        "current_date",
1773        "foreign",
1774        "with",
1775        "grant",
1776        "session_user",
1777        "or",
1778        "except",
1779        "references",
1780        "fetch",
1781        "limit",
1782        "group_p",
1783        "leading",
1784        "into",
1785        "collate",
1786        "offset",
1787        "do",
1788        "then",
1789        "localtimestamp",
1790        "check_p",
1791        "lateral_p",
1792        "current_role",
1793        "where",
1794        "asc_p",
1795        "placing",
1796        "desc_p",
1797        "user",
1798        "unique",
1799        "initially",
1800        "column",
1801        "both",
1802        "some",
1803        "as",
1804        "any",
1805        "only",
1806        "deferrable",
1807        "null_p",
1808        "current_time",
1809        "true_p",
1810        "table",
1811        "case",
1812        "trailing",
1813        "variadic",
1814        "for",
1815        "on",
1816        "distinct",
1817        "false_p",
1818        "not",
1819        "constraint",
1820        "current_timestamp",
1821        "returning",
1822        "primary",
1823        "intersect",
1824        "having",
1825        "analyze",
1826        "current_user",
1827        "and",
1828        "cast",
1829        "symmetric",
1830        "using",
1831        "order",
1832        "current_catalog",
1833    }
1834
1835    UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
1836
1837    # DuckDB doesn't generally support CREATE TABLE .. properties
1838    # https://duckdb.org/docs/sql/statements/create_table.html
1839    # There are a few exceptions (e.g. temporary tables) which are supported or
1840    # can be transpiled to DuckDB, so we explicitly override them accordingly
1841    PROPERTIES_LOCATION = {
1842        **{
1843            prop: exp.Properties.Location.UNSUPPORTED
1844            for prop in generator.Generator.PROPERTIES_LOCATION
1845        },
1846        exp.LikeProperty: exp.Properties.Location.POST_SCHEMA,
1847        exp.TemporaryProperty: exp.Properties.Location.POST_CREATE,
1848        exp.ReturnsProperty: exp.Properties.Location.POST_ALIAS,
1849        exp.SequenceProperties: exp.Properties.Location.POST_EXPRESSION,
1850        exp.IcebergProperty: exp.Properties.Location.POST_CREATE,
1851    }
1852
1853    IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS: t.ClassVar = _IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS
1854
1855    # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1856    ZIPF_TEMPLATE: exp.Expr = exp.maybe_parse(
1857        """
1858        WITH rand AS (SELECT :random_expr AS r),
1859        weights AS (
1860            SELECT i, 1.0 / POWER(i, :s) AS w
1861            FROM RANGE(1, :n + 1) AS t(i)
1862        ),
1863        cdf AS (
1864            SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1865            FROM weights
1866        )
1867        SELECT MIN(i)
1868        FROM cdf
1869        WHERE p >= (SELECT r FROM rand)
1870        """
1871    )
1872
1873    # Template for NORMAL transpilation using Box-Muller transform
1874    # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2))
1875    NORMAL_TEMPLATE: exp.Expr = exp.maybe_parse(
1876        ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))"
1877    )
1878
1879    # Template for generating a seeded pseudo-random value in [0, 1) from a hash
1880    SEEDED_RANDOM_TEMPLATE: exp.Expr = exp.maybe_parse("(ABS(HASH(:seed)) % 1000000) / 1000000.0")
1881
1882    # Template for generating signed and unsigned SEQ values within a specified range
1883    SEQ_UNSIGNED: exp.Expr = _SEQ_UNSIGNED
1884    SEQ_SIGNED: exp.Expr = _SEQ_SIGNED
1885
1886    # Template for MAP_CAT transpilation - Snowflake semantics:
1887    # 1. Returns NULL if either input is NULL
1888    # 2. For duplicate keys, prefers non-NULL value (COALESCE(m2[k], m1[k]))
1889    # 3. Filters out entries with NULL values from the result
1890    MAPCAT_TEMPLATE: exp.Expr = exp.maybe_parse(
1891        """
1892        CASE
1893            WHEN :map1 IS NULL OR :map2 IS NULL THEN NULL
1894            ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(
1895                LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(:map1), MAP_KEYS(:map2))),
1896                __k -> STRUCT_PACK(key := __k, value := COALESCE(:map2[__k], :map1[__k]))
1897            ), __x -> __x.value IS NOT NULL))
1898        END
1899        """
1900    )
1901
1902    # Mappings for EXTRACT/DATE_PART transpilation
1903    # Maps Snowflake specifiers unsupported in DuckDB to strftime format codes
1904    EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {
1905        "WEEKISO": ("%V", "INTEGER"),
1906        "YEAROFWEEK": ("%G", "INTEGER"),
1907        "YEAROFWEEKISO": ("%G", "INTEGER"),
1908        "NANOSECOND": ("%n", "BIGINT"),
1909    }
1910
1911    # Maps epoch-based specifiers to DuckDB epoch functions
1912    EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {
1913        "EPOCH_SECOND": "EPOCH",
1914        "EPOCH_MILLISECOND": "EPOCH_MS",
1915        "EPOCH_MICROSECOND": "EPOCH_US",
1916        "EPOCH_NANOSECOND": "EPOCH_NS",
1917    }
1918
1919    # Template for BITMAP_CONSTRUCT_AGG transpilation
1920    #
1921    # BACKGROUND:
1922    # Snowflake's BITMAP_CONSTRUCT_AGG aggregates integers into a compact binary bitmap.
1923    # Supports values in range 0-32767, this version returns NULL if any value is out of range
1924    # See: https://docs.snowflake.com/en/sql-reference/functions/bitmap_construct_agg
1925    # See: https://docs.snowflake.com/en/user-guide/querying-bitmaps-for-distinct-counts
1926    #
1927    # Snowflake uses two different formats based on the number of unique values:
1928    #
1929    # Format 1 - Small bitmap (< 5 unique values): Length of 10 bytes
1930    #   Bytes 0-1: Count of values as 2-byte big-endian integer (e.g., 3 values = 0x0003)
1931    #   Bytes 2-9: Up to 4 values, each as 2-byte little-endian integers, zero-padded to 8 bytes
1932    #   Example: Values [1, 2, 3] -> 0x0003 0100 0200 0300 0000 (hex)
1933    #                                count  v1   v2   v3   pad
1934    #
1935    # Format 2 - Large bitmap (>= 5 unique values): Length of 10 + (2 * count) bytes
1936    #   Bytes 0-9: Fixed header 0x08 followed by 9 zero bytes
1937    #   Bytes 10+: Each value as 2-byte little-endian integer (no padding)
1938    #   Example: Values [1,2,3,4,5] -> 0x08 00000000 00000000 00 0100 0200 0300 0400 0500
1939    #                                  hdr  ----9 zero bytes----  v1   v2   v3   v4   v5
1940    #
1941    # TEMPLATE STRUCTURE
1942    #
1943    # Phase 1 - Innermost subquery: Data preparation
1944    #   SELECT LIST_SORT(...) AS l
1945    #   - Aggregates all input values into a list, remove NULLs, duplicates and sorts
1946    #   Result: Clean, sorted list of unique non-null integers stored as 'l'
1947    #
1948    # Phase 2 - Middle subquery: Hex string construction
1949    #   LIST_TRANSFORM(...)
1950    #   - Converts each integer to 2-byte little-endian hex representation
1951    #   - & 255 extracts low byte, >> 8 extracts high byte
1952    #   - LIST_REDUCE: Concatenates all hex pairs into single string 'h'
1953    #   Result: Hex string of all values
1954    #
1955    # Phase 3 - Outer SELECT: Final bitmap assembly
1956    #   LENGTH(l) < 5:
1957    #   - Small format: 2-byte count (big-endian via %04X) + values + zero padding
1958    #   LENGTH(l) >= 5:
1959    #   - Large format: Fixed 10-byte header + values (no padding needed)
1960    #   Result: Complete binary bitmap as BLOB
1961    #
1962    BITMAP_CONSTRUCT_AGG_TEMPLATE: exp.Expr = exp.maybe_parse(
1963        """
1964        SELECT CASE
1965            WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL
1966            WHEN LENGTH(l) != LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL
1967            WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2))
1968            ELSE UNHEX('08000000000000000000' || h)
1969        END
1970        FROM (
1971            SELECT l, COALESCE(LIST_REDUCE(
1972                LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)),
1973                (__a, __b) -> __a || __b, ''
1974            ), '') AS h
1975            FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(:arg) FILTER(NOT :arg IS NULL))) AS l)
1976        )
1977        """
1978    )
1979
1980    # Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1981    RANDSTR_TEMPLATE: exp.Expr = exp.maybe_parse(
1982        f"""
1983        SELECT LISTAGG(
1984            SUBSTRING(
1985                '{RANDSTR_CHAR_POOL}',
1986                1 + CAST(FLOOR(random_value * 62) AS INT),
1987                1
1988            ),
1989            ''
1990        )
1991        FROM (
1992            SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1993            FROM RANGE(:length) AS t(i)
1994        )
1995        """,
1996    )
1997
1998    # Template for MINHASH transpilation
1999    # Computes k minimum hash values across aggregated data using DuckDB list functions
2000    # Returns JSON matching Snowflake format: {"state": [...], "type": "minhash", "version": 1}
2001    MINHASH_TEMPLATE: exp.Expr = exp.maybe_parse(
2002        """
2003        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed), 'type', 'minhash', 'version', 1)
2004        FROM (
2005            SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS VARCHAR) || CAST(seed AS VARCHAR)))) AS min_h
2006            FROM (SELECT LIST(:expr) AS vals), RANGE(0, :k) AS t(seed)
2007        )
2008        """,
2009    )
2010
2011    # Template for MINHASH_COMBINE transpilation
2012    # Combines multiple minhash signatures by taking element-wise minimum
2013    MINHASH_COMBINE_TEMPLATE: exp.Expr = exp.maybe_parse(
2014        """
2015        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx), 'type', 'minhash', 'version', 1)
2016        FROM (
2017            SELECT
2018                pos AS idx,
2019                MIN(val) AS min_h
2020            FROM
2021                UNNEST(LIST(:expr)) AS _(sig),
2022                UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos)
2023            GROUP BY pos
2024        )
2025        """,
2026    )
2027
2028    # Template for APPROXIMATE_SIMILARITY transpilation
2029    # Computes multi-way Jaccard similarity: fraction of positions where ALL signatures agree
2030    APPROXIMATE_SIMILARITY_TEMPLATE: exp.Expr = exp.maybe_parse(
2031        """
2032        SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*)
2033        FROM (
2034            SELECT pos, COUNT(DISTINCT h) AS num_distinct
2035            FROM (
2036                SELECT h, pos
2037                FROM UNNEST(LIST(:expr)) AS _(sig),
2038                     UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos)
2039            )
2040            GROUP BY pos
2041        )
2042        """,
2043    )
2044
2045    # Template for ARRAYS_ZIP transpilation
2046    # Snowflake pads to longest array; DuckDB LIST_ZIP truncates to shortest
2047    # Uses RANGE + indexing to match Snowflake behavior
2048    ARRAYS_ZIP_TEMPLATE: exp.Expr = exp.maybe_parse(
2049        """
2050        CASE WHEN :null_check THEN NULL
2051        WHEN :all_empty_check THEN [:empty_struct]
2052        ELSE LIST_TRANSFORM(RANGE(0, :max_len), __i -> :transform_struct)
2053        END
2054        """,
2055    )
2056
2057    UUID_V5_TEMPLATE: exp.Expr = exp.maybe_parse(
2058        """
2059        (SELECT
2060            LOWER(
2061                SUBSTR(h, 1, 8) || '-' ||
2062                SUBSTR(h, 9, 4) || '-' ||
2063                '5' || SUBSTR(h, 14, 3) || '-' ||
2064                FORMAT('{:02x}', CAST('0x' || SUBSTR(h, 17, 2) AS INT) & 63 | 128) || SUBSTR(h, 19, 2) || '-' ||
2065                SUBSTR(h, 21, 12)
2066            )
2067        FROM (
2068            SELECT SUBSTR(SHA1(UNHEX(REPLACE(:namespace, '-', '')) || ENCODE(:name, 'utf8')), 1, 32) AS h
2069        ))
2070        """
2071    )
2072
2073    # Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
2074    # Each element is paired with its 1-based position via LIST_ZIP, then filtered
2075    # by a comparison operator (supplied via :cond) that determines the operation:
2076    #   EXCEPT (>):        keep the N-th occurrence only if N > count in arr2
2077    #                      e.g. [2,2,2] EXCEPT [2,2] -> [2]
2078    #   INTERSECTION (<=): keep the N-th occurrence only if N <= count in arr2
2079    #                      e.g. [2,2,2] INTERSECT [2,2] -> [2,2]
2080    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2081    ARRAY_BAG_TEMPLATE: exp.Expr = exp.maybe_parse(
2082        """
2083        CASE
2084            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2085            ELSE LIST_TRANSFORM(
2086                LIST_FILTER(
2087                    LIST_ZIP(:arr1, GENERATE_SERIES(1, LEN(:arr1))),
2088                    pair -> :cond
2089                ),
2090                pair -> pair[0]
2091            )
2092        END
2093        """
2094    )
2095
2096    ARRAY_EXCEPT_CONDITION: exp.Expr = exp.maybe_parse(
2097        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2098        " > LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2099    )
2100
2101    ARRAY_INTERSECTION_CONDITION: exp.Expr = exp.maybe_parse(
2102        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2103        " <= LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2104    )
2105
2106    # Set semantics for ARRAY_EXCEPT. Deduplicates arr1 via LIST_DISTINCT, then
2107    # filters out any element that appears at least once in arr2.
2108    #   e.g. [1,1,2,3] EXCEPT [1] -> [2,3]
2109    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2110    ARRAY_EXCEPT_SET_TEMPLATE: exp.Expr = exp.maybe_parse(
2111        """
2112        CASE
2113            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2114            ELSE LIST_FILTER(
2115                LIST_DISTINCT(:arr1),
2116                e -> LEN(LIST_FILTER(:arr2, x -> x IS NOT DISTINCT FROM e)) = 0
2117            )
2118        END
2119        """
2120    )
2121
2122    STRTOK_TO_ARRAY_TEMPLATE: exp.Expr = exp.maybe_parse(
2123        """
2124        CASE WHEN :delimiter IS NULL THEN NULL
2125        ELSE LIST_FILTER(
2126            REGEXP_SPLIT_TO_ARRAY(:string, CASE WHEN :delimiter = '' THEN '.^' ELSE CONCAT('[', :escaped, ']') END),
2127            x -> NOT x = ''
2128        ) END
2129        """
2130    )
2131
2132    # Template for STRTOK function transpilation
2133    #
2134    # DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
2135    # We may need to adjust this if we want to support transpilation from other dialects
2136    #
2137    # CASE
2138    #     -- Snowflake: empty delimiter + empty input string -> NULL
2139    #     WHEN delimiter = '' AND input_str = '' THEN NULL
2140    #
2141    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return input string if index is 1
2142    #     WHEN delimiter = '' AND index = 1 THEN input_str
2143    #
2144    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return NULL if index is not 1
2145    #     WHEN delimiter = '' THEN NULL
2146    #
2147    #     -- Snowflake: negative indices return NULL
2148    #     WHEN index < 0 THEN NULL
2149    #
2150    #     -- Snowflake: return NULL if any argument is NULL
2151    #     WHEN input_str IS NULL OR delimiter IS NULL OR index IS NULL THEN NULL
2152    #
2153    #
2154    #     ELSE LIST_FILTER(
2155    #         REGEXP_SPLIT_TO_ARRAY(
2156    #             input_str,
2157    #             CASE
2158    #                 -- if delimiter is '', we don't want to surround it with '[' and ']' as '[]' is invalid for DuckDB
2159    #                 WHEN delimiter = '' THEN ''
2160    #
2161    #                 -- handle problematic regex characters in delimiter with REGEXP_REPLACE
2162    #                 -- turn delimiter into a regex char set, otherwise DuckDB will match in order, which we don't want
2163    #                 ELSE '[' || REGEXP_REPLACE(delimiter, problematic_char_set, '\\\1', 'g') || ']'
2164    #             END
2165    #         ),
2166    #
2167    #         -- Snowflake: don't return empty strings
2168    #         x -> NOT x = ''
2169    #     )[index]
2170    # END
2171    STRTOK_TEMPLATE: exp.Expr = exp.maybe_parse(
2172        """
2173        CASE
2174            WHEN :delimiter = '' AND :string = '' THEN NULL
2175            WHEN :delimiter = '' AND :part_index = 1 THEN :string
2176            WHEN :delimiter = '' THEN NULL
2177            WHEN :part_index < 0 THEN NULL
2178            WHEN :string IS NULL OR :delimiter IS NULL OR :part_index IS NULL THEN NULL
2179            ELSE :base_func
2180        END
2181        """
2182    )
2183
2184    def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str:
2185        cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2))
2186        return self.sql(
2187            exp.replace_placeholders(self.ARRAY_BAG_TEMPLATE, arr1=arr1, arr2=arr2, cond=cond)
2188        )
2189
2190    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2191        """
2192        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2193
2194        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2195        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2196
2197        For 'END' kind, add the interval to get the end of the slice.
2198        For DATE type with 'END', cast result back to DATE to preserve type.
2199        """
2200        date_expr = expression.this
2201        slice_length = expression.expression
2202        unit = expression.unit
2203        kind = expression.text("kind").upper()
2204
2205        # Create INTERVAL expression: INTERVAL 'N' UNIT
2206        interval_expr = exp.Interval(this=slice_length, unit=unit)
2207
2208        # Create base time_bucket expression
2209        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2210
2211        # Check if we need the end of the slice (default is start)
2212        if not kind == "END":
2213            # For 'START', return time_bucket directly
2214            return self.sql(time_bucket_expr)
2215
2216        # For 'END', add the interval to get end of slice
2217        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2218
2219        # If input is DATE type, cast result back to DATE to preserve type
2220        # DuckDB converts DATE to TIMESTAMP when adding intervals
2221        if date_expr.is_type(exp.DType.DATE):
2222            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2223
2224        return self.sql(add_expr)
2225
2226    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2227        """
2228        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2229
2230        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2231        - Each bucket covers 32,768 values
2232        - Bucket numbering starts at 1
2233        - Formula: ((value - 1) // 32768) + 1 for positive values
2234
2235        For non-positive values (0 and negative), we use value // 32768 to avoid
2236        producing bucket 0 or positive bucket IDs for negative inputs.
2237        """
2238        value = expression.this
2239
2240        positive_formula = ((value - 1) // 32768) + 1
2241        non_positive_formula = value // 32768
2242
2243        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2244        case_expr = (
2245            exp.case()
2246            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2247            .else_(non_positive_formula)
2248        )
2249        return self.sql(case_expr)
2250
2251    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2252        """
2253        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2254
2255        Snowflake's BITMAP_BIT_POSITION behavior:
2256        - For n <= 0: returns ABS(n) % 32768
2257        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2258        """
2259        this = expression.this
2260
2261        return self.sql(
2262            exp.Mod(
2263                this=exp.Paren(
2264                    this=exp.If(
2265                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2266                        true=this - exp.Literal.number(1),
2267                        false=exp.Abs(this=this),
2268                    )
2269                ),
2270                expression=MAX_BIT_POSITION,
2271            )
2272        )
2273
2274    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2275        """
2276        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2277        Uses a pre-parsed template with placeholders replaced by expression nodes.
2278
2279        Snowflake bitmap format:
2280        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2281        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2282        """
2283        arg = expression.this
2284        return (
2285            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2286        )
2287
2288    def compress_sql(self, expression: exp.Compress) -> str:
2289        self.unsupported("DuckDB does not support the COMPRESS() function")
2290        return self.function_fallback_sql(expression)
2291
2292    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2293        self.unsupported("ENCRYPT is not supported in DuckDB")
2294        return self.function_fallback_sql(expression)
2295
2296    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2297        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2298        self.unsupported(f"{func_name} is not supported in DuckDB")
2299        return self.function_fallback_sql(expression)
2300
2301    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2302        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2303        self.unsupported(f"{func_name} is not supported in DuckDB")
2304        return self.function_fallback_sql(expression)
2305
2306    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2307        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2308        return self.function_fallback_sql(expression)
2309
2310    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2311        self.unsupported("PARSE_URL is not supported in DuckDB")
2312        return self.function_fallback_sql(expression)
2313
2314    def parseip_sql(self, expression: exp.ParseIp) -> str:
2315        self.unsupported("PARSE_IP is not supported in DuckDB")
2316        return self.function_fallback_sql(expression)
2317
2318    def decompressstring_sql(self, expression: exp.DecompressString) -> str:
2319        self.unsupported("DECOMPRESS_STRING is not supported in DuckDB")
2320        return self.function_fallback_sql(expression)
2321
2322    def decompressbinary_sql(self, expression: exp.DecompressBinary) -> str:
2323        self.unsupported("DECOMPRESS_BINARY is not supported in DuckDB")
2324        return self.function_fallback_sql(expression)
2325
2326    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2327        this = expression.this
2328        expr = expression.expression
2329
2330        if expression.args.get("case_insensitive"):
2331            this = exp.Upper(this=this)
2332            expr = exp.Upper(this=expr)
2333
2334        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2335
2336        if expression.args.get("integer_scale"):
2337            result = exp.cast(result * 100, "INTEGER")
2338
2339        return self.sql(result)
2340
2341    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2342        from_first = expression.args.get("from_first", True)
2343        if not from_first:
2344            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2345
2346        return self.function_fallback_sql(expression)
2347
2348    def randstr_sql(self, expression: exp.Randstr) -> str:
2349        """
2350        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2351        Uses a pre-parsed template with placeholders replaced by expression nodes.
2352
2353        RANDSTR(length, generator) generates a random string of specified length.
2354        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2355        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2356        - No generator: Use default seed value
2357        """
2358        length = expression.this
2359        generator = expression.args.get("generator")
2360
2361        if generator:
2362            if isinstance(generator, exp.Rand):
2363                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2364                seed_value = generator.this or generator
2365            else:
2366                # Const/int or other expression - use as seed directly
2367                seed_value = generator
2368        else:
2369            # No generator specified, use default seed (arbitrary but deterministic)
2370            seed_value = exp.Literal.number(RANDSTR_SEED)
2371
2372        replacements = {"seed": seed_value, "length": length}
2373        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
2374
2375    @unsupported_args("finish")
2376    def reduce_sql(self, expression: exp.Reduce) -> str:
2377        array_arg = expression.this
2378        initial_value = expression.args.get("initial")
2379        merge_lambda = expression.args.get("merge")
2380
2381        if merge_lambda:
2382            merge_lambda.set("colon", True)
2383
2384        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
2385
2386    def zipf_sql(self, expression: exp.Zipf) -> str:
2387        """
2388        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2389        Uses a pre-parsed template with placeholders replaced by expression nodes.
2390        """
2391        s = expression.this
2392        n = expression.args["elementcount"]
2393        gen = expression.args["gen"]
2394
2395        if not isinstance(gen, exp.Rand):
2396            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2397            random_expr: exp.Expr = exp.Div(
2398                this=exp.Paren(
2399                    this=exp.Mod(
2400                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2401                        expression=exp.Literal.number(1000000),
2402                    )
2403                ),
2404                expression=exp.Literal.number(1000000.0),
2405            )
2406        else:
2407            # Use RANDOM() for non-deterministic output
2408            random_expr = exp.Rand()
2409
2410        replacements = {"s": s, "n": n, "random_expr": random_expr}
2411        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
2412
2413    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2414        """
2415        TO_BINARY and TRY_TO_BINARY transpilation:
2416        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2417        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2418        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2419
2420        For TRY_TO_BINARY (safe=True), wrap with TRY():
2421        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2422        """
2423        value = expression.this
2424        format_arg = expression.args.get("format")
2425        is_safe = expression.args.get("safe")
2426        is_binary = _is_binary(expression)
2427
2428        if not format_arg and not is_binary:
2429            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2430            return self.func(func_name, value)
2431
2432        # Snowflake defaults to HEX encoding when no format is specified
2433        fmt = format_arg.name.upper() if format_arg else "HEX"
2434
2435        if fmt in ("UTF-8", "UTF8"):
2436            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2437            result = self.func("ENCODE", value)
2438        elif fmt == "BASE64":
2439            result = self.func("FROM_BASE64", value)
2440        elif fmt == "HEX":
2441            result = self.func("UNHEX", value)
2442        else:
2443            if is_safe:
2444                return self.sql(exp.null())
2445            else:
2446                self.unsupported(f"format {fmt} is not supported")
2447                result = self.func("TO_BINARY", value)
2448        return f"TRY({result})" if is_safe else result
2449
2450    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2451        fmt = expression.args.get("format")
2452        precision = expression.args.get("precision")
2453        scale = expression.args.get("scale")
2454
2455        if not fmt and precision and scale:
2456            return self.sql(
2457                exp.cast(
2458                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2459                )
2460            )
2461
2462        return super().tonumber_sql(expression)
2463
2464    def _greatest_least_sql(self, expression: exp.Greatest | exp.Least) -> str:
2465        """
2466        Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
2467
2468        - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL
2469        - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
2470        """
2471        # Get all arguments
2472        all_args = [expression.this, *expression.expressions]
2473        fallback_sql = self.function_fallback_sql(expression)
2474
2475        if expression.args.get("ignore_nulls"):
2476            # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
2477            return self.sql(fallback_sql)
2478
2479        # return NULL if any argument is NULL
2480        case_expr = exp.case().when(
2481            exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
2482            exp.null(),
2483            copy=False,
2484        )
2485        case_expr.set("default", fallback_sql)
2486        return self.sql(case_expr)
2487
2488    def generator_sql(self, expression: exp.Generator) -> str:
2489        # Transpile Snowflake GENERATOR to DuckDB range()
2490        rowcount = expression.args.get("rowcount")
2491        time_limit = expression.args.get("time_limit")
2492
2493        if time_limit:
2494            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2495
2496        if not rowcount:
2497            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2498            return self.func("range", exp.Literal.number(0))
2499
2500        return self.func("range", rowcount)
2501
2502    def greatest_sql(self, expression: exp.Greatest) -> str:
2503        return self._greatest_least_sql(expression)
2504
2505    def least_sql(self, expression: exp.Least) -> str:
2506        return self._greatest_least_sql(expression)
2507
2508    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2509        if expression.args.get("colon"):
2510            prefix = "LAMBDA "
2511            arrow_sep = ":"
2512            wrap = False
2513        else:
2514            prefix = ""
2515
2516        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2517        return f"{prefix}{lambda_sql}"
2518
2519    def show_sql(self, expression: exp.Show) -> str:
2520        from_ = self.sql(expression, "from_")
2521        from_ = f" FROM {from_}" if from_ else ""
2522        return f"SHOW {expression.name}{from_}"
2523
2524    def soundex_sql(self, expression: exp.Soundex) -> str:
2525        self.unsupported("SOUNDEX is not supported in DuckDB")
2526        return self.func("SOUNDEX", expression.this)
2527
2528    def sortarray_sql(self, expression: exp.SortArray) -> str:
2529        arr = expression.this
2530        asc = expression.args.get("asc")
2531        nulls_first = expression.args.get("nulls_first")
2532
2533        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2534            return self.func("LIST_SORT", arr, asc, nulls_first)
2535
2536        nulls_are_first = nulls_first == exp.true()
2537        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2538
2539        if not isinstance(asc, exp.Boolean):
2540            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2541
2542        descending = asc == exp.false()
2543
2544        if not descending and not nulls_are_first:
2545            return self.func("LIST_SORT", arr)
2546        if not nulls_are_first:
2547            return self.func("ARRAY_REVERSE_SORT", arr)
2548        return self.func(
2549            "LIST_SORT",
2550            arr,
2551            exp.Literal.string("DESC" if descending else "ASC"),
2552            exp.Literal.string("NULLS FIRST"),
2553        )
2554
2555    def install_sql(self, expression: exp.Install) -> str:
2556        force = "FORCE " if expression.args.get("force") else ""
2557        this = self.sql(expression, "this")
2558        from_clause = expression.args.get("from_")
2559        from_clause = f" FROM {from_clause}" if from_clause else ""
2560        return f"{force}INSTALL {this}{from_clause}"
2561
2562    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2563        self.unsupported(
2564            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2565        )
2566        return self.function_fallback_sql(expression)
2567
2568    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2569        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
2570
2571    def strposition_sql(self, expression: exp.StrPosition) -> str:
2572        this = expression.this
2573        substr = expression.args.get("substr")
2574        position = expression.args.get("position")
2575
2576        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2577        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2578        if _is_binary(this):
2579            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2580            hex_strpos = exp.StrPosition(
2581                this=exp.Hex(this=this),
2582                substr=exp.Hex(this=substr),
2583            )
2584
2585            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2586
2587        # For VARCHAR: handle clamp_position
2588        if expression.args.get("clamp_position") and position:
2589            expression = expression.copy()
2590            expression.set(
2591                "position",
2592                exp.If(
2593                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2594                    true=exp.Literal.number(1),
2595                    false=position.copy(),
2596                ),
2597            )
2598
2599        return strposition_sql(self, expression)
2600
2601    def substring_sql(self, expression: exp.Substring) -> str:
2602        if expression.args.get("zero_start"):
2603            start = expression.args.get("start")
2604            length = expression.args.get("length")
2605
2606            if start := expression.args.get("start"):
2607                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2608            if length := expression.args.get("length"):
2609                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2610
2611            return self.func("SUBSTRING", expression.this, start, length)
2612
2613        return self.function_fallback_sql(expression)
2614
2615    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2616        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2617        target_type = expression.args.get("target_type")
2618        needs_tz = target_type and target_type.this in (
2619            exp.DType.TIMESTAMPLTZ,
2620            exp.DType.TIMESTAMPTZ,
2621        )
2622
2623        if expression.args.get("safe"):
2624            formatted_time = self.format_time(expression)
2625            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2626            return self.sql(
2627                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2628            )
2629
2630        base_sql = str_to_time_sql(self, expression)
2631        if needs_tz:
2632            return self.sql(
2633                exp.cast(
2634                    base_sql,
2635                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2636                )
2637            )
2638        return base_sql
2639
2640    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2641        formatted_time = self.format_time(expression)
2642        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2643        return self.sql(
2644            exp.cast(
2645                self.func(function_name, expression.this, formatted_time),
2646                exp.DataType(this=exp.DType.DATE),
2647            )
2648        )
2649
2650    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2651        this = expression.this
2652        time_format = self.format_time(expression)
2653        safe = expression.args.get("safe")
2654        time_type = exp.DataType.from_str("TIME", dialect="duckdb")
2655        cast_expr = exp.TryCast if safe else exp.Cast
2656
2657        if time_format:
2658            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2659            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2660            return self.sql(cast_expr(this=strptime, to=time_type))
2661
2662        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2663            return self.sql(this)
2664
2665        return self.sql(cast_expr(this=this, to=time_type))
2666
2667    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2668        if not expression.this:
2669            return "CURRENT_DATE"
2670
2671        expr = exp.Cast(
2672            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2673            to=exp.DataType(this=exp.DType.DATE),
2674        )
2675        return self.sql(expr)
2676
2677    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2678        arg = expression.this
2679        return self.sql(
2680            exp.case()
2681            .when(
2682                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2683                exp.null(),
2684            )
2685            .else_(exp.Literal.string("Invalid JSON"))
2686        )
2687
2688    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2689        arg = expression.this
2690        if expression.args.get("safe"):
2691            return self.sql(
2692                exp.case()
2693                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2694                .else_(exp.null())
2695            )
2696        return self.func("JSON", arg)
2697
2698    def unicode_sql(self, expression: exp.Unicode) -> str:
2699        if expression.args.get("empty_is_zero"):
2700            return self.sql(
2701                exp.case()
2702                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2703                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2704            )
2705
2706        return self.func("UNICODE", expression.this)
2707
2708    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2709        return self.sql(
2710            exp.case()
2711            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2712            .else_(expression.this)
2713        )
2714
2715    def trunc_sql(self, expression: exp.Trunc) -> str:
2716        decimals = expression.args.get("decimals")
2717        if (
2718            expression.args.get("fractions_supported")
2719            and decimals
2720            and not decimals.is_type(exp.DType.INT)
2721        ):
2722            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2723
2724        return self.func("TRUNC", expression.this, decimals)
2725
2726    def normal_sql(self, expression: exp.Normal) -> str:
2727        """
2728        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2729
2730        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2731        """
2732        mean = expression.this
2733        stddev = expression.args["stddev"]
2734        gen: exp.Expr = expression.args["gen"]
2735
2736        # Build two uniform random values [0, 1) for Box-Muller transform
2737        if isinstance(gen, exp.Rand) and gen.this is None:
2738            u1: exp.Expr = exp.Rand()
2739            u2: exp.Expr = exp.Rand()
2740        else:
2741            # Seeded: derive two values using HASH with different inputs
2742            seed = gen.this if isinstance(gen, exp.Rand) else gen
2743            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2744            u2 = exp.replace_placeholders(
2745                self.SEEDED_RANDOM_TEMPLATE,
2746                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2747            )
2748
2749        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2750        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
2751
2752    def uniform_sql(self, expression: exp.Uniform) -> str:
2753        """
2754        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2755
2756        UNIFORM returns a random value in [min, max]:
2757        - Integer result if both min and max are integers
2758        - Float result if either min or max is a float
2759        """
2760        min_val = expression.this
2761        max_val = expression.expression
2762        gen = expression.args.get("gen")
2763
2764        # Determine if result should be integer (both bounds are integers).
2765        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2766        is_int_result = min_val.is_int and max_val.is_int
2767
2768        # Build the random value expression [0, 1)
2769        if not isinstance(gen, exp.Rand):
2770            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2771            random_expr: exp.Expr = exp.Div(
2772                this=exp.Paren(
2773                    this=exp.Mod(
2774                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2775                        expression=exp.Literal.number(1000000),
2776                    )
2777                ),
2778                expression=exp.Literal.number(1000000.0),
2779            )
2780        else:
2781            random_expr = exp.Rand()
2782
2783        # Build: min + random * (max - min [+ 1 for int])
2784        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2785        if is_int_result:
2786            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2787
2788        result: exp.Expr = exp.Add(
2789            this=min_val,
2790            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2791        )
2792
2793        if is_int_result:
2794            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2795
2796        return self.sql(result)
2797
2798    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2799        nano = expression.args.get("nano")
2800        overflow = expression.args.get("overflow")
2801
2802        # Snowflake's TIME_FROM_PARTS supports overflow
2803        if overflow:
2804            hour = expression.args["hour"]
2805            minute = expression.args["min"]
2806            sec = expression.args["sec"]
2807
2808            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2809            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2810                try:
2811                    h_val = hour.to_py()
2812                    m_val = minute.to_py()
2813                    s_val = sec.to_py()
2814                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2815                        return rename_func("MAKE_TIME")(self, expression)
2816                except ValueError:
2817                    pass
2818
2819            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2820            if nano:
2821                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2822
2823            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2824
2825            return self.sql(
2826                exp.Add(
2827                    this=exp.Cast(
2828                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2829                    ),
2830                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2831                )
2832            )
2833
2834        # Default: MAKE_TIME
2835        if nano:
2836            expression.set(
2837                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2838            )
2839
2840        return rename_func("MAKE_TIME")(self, expression)
2841
2842    def extract_sql(self, expression: exp.Extract) -> str:
2843        """
2844        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2845
2846        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2847        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2848        """
2849        this = expression.this
2850        datetime_expr = expression.expression
2851
2852        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2853        # because Snowflake applies server timezone while DuckDB uses local timezone
2854        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2855            self.unsupported(
2856                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2857            )
2858
2859        part_name = this.name.upper()
2860
2861        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2862            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2863
2864            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2865            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2866            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2867                exp.DType.TIME, exp.DType.TIMETZ
2868            )
2869
2870            if is_nano_time:
2871                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2872                return self.sql(
2873                    exp.cast(
2874                        exp.Mul(
2875                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2876                            expression=exp.Literal.number(1000),
2877                        ),
2878                        exp.DataType.from_str(cast_type, dialect="duckdb"),
2879                    )
2880                )
2881
2882            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2883            strftime_input = datetime_expr
2884            if part_name == "NANOSECOND":
2885                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2886
2887            return self.sql(
2888                exp.cast(
2889                    exp.Anonymous(
2890                        this="STRFTIME",
2891                        expressions=[strftime_input, exp.Literal.string(fmt)],
2892                    ),
2893                    exp.DataType.from_str(cast_type, dialect="duckdb"),
2894                )
2895            )
2896
2897        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2898            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2899            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2900            # EPOCH returns float, cast to BIGINT for integer result
2901            if part_name == "EPOCH_SECOND":
2902                result = exp.cast(result, exp.DataType.from_str("BIGINT", dialect="duckdb"))
2903            return self.sql(result)
2904
2905        return super().extract_sql(expression)
2906
2907    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2908        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2909        date_expr = expression.this
2910        time_expr = expression.expression
2911
2912        if date_expr is not None and time_expr is not None:
2913            # In DuckDB, DATE + TIME produces TIMESTAMP
2914            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2915
2916        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2917        sec = expression.args.get("sec")
2918        if sec is None:
2919            # This shouldn't happen with valid input, but handle gracefully
2920            return rename_func("MAKE_TIMESTAMP")(self, expression)
2921
2922        milli = expression.args.get("milli")
2923        if milli is not None:
2924            sec += milli.pop() / exp.Literal.number(1000.0)
2925
2926        nano = expression.args.get("nano")
2927        if nano is not None:
2928            sec += nano.pop() / exp.Literal.number(1000000000.0)
2929
2930        if milli or nano:
2931            expression.set("sec", sec)
2932
2933        return rename_func("MAKE_TIMESTAMP")(self, expression)
2934
2935    @unsupported_args("nano")
2936    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2937        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2938        if nano := expression.args.get("nano"):
2939            nano.pop()
2940
2941        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2942        return f"CAST({timestamp} AS TIMESTAMPTZ)"
2943
2944    @unsupported_args("nano")
2945    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2946        # Extract zone before popping
2947        zone = expression.args.get("zone")
2948        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2949        if zone:
2950            zone = zone.pop()
2951
2952        if nano := expression.args.get("nano"):
2953            nano.pop()
2954
2955        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2956
2957        if zone:
2958            # Use AT TIME ZONE to apply the explicit timezone
2959            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2960
2961        return timestamp
2962
2963    def tablesample_sql(
2964        self,
2965        expression: exp.TableSample,
2966        tablesample_keyword: str | None = None,
2967    ) -> str:
2968        if not isinstance(expression.parent, exp.Select):
2969            # This sample clause only applies to a single source, not the entire resulting relation
2970            tablesample_keyword = "TABLESAMPLE"
2971
2972        if expression.args.get("size"):
2973            method = expression.args.get("method")
2974            if method and method.name.upper() != "RESERVOIR":
2975                self.unsupported(
2976                    f"Sampling method {method} is not supported with a discrete sample count, "
2977                    "defaulting to reservoir sampling"
2978                )
2979                expression.set("method", exp.var("RESERVOIR"))
2980
2981        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
2982
2983    def join_sql(self, expression: exp.Join) -> str:
2984        if (
2985            not expression.args.get("using")
2986            and not expression.args.get("on")
2987            and not expression.method
2988            and (expression.kind in ("", "INNER", "OUTER"))
2989        ):
2990            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2991            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2992            if isinstance(expression.this, exp.Unnest):
2993                return super().join_sql(expression.on(exp.true()))
2994
2995            expression.set("side", None)
2996            expression.set("kind", None)
2997
2998        return super().join_sql(expression)
2999
3000    def countif_sql(self, expression: exp.CountIf) -> str:
3001        if self.dialect.version >= (1, 2):
3002            return self.function_fallback_sql(expression)
3003
3004        # https://github.com/tobymao/sqlglot/pull/4749
3005        return count_if_to_sum(self, expression)
3006
3007    def bracket_sql(self, expression: exp.Bracket) -> str:
3008        if self.dialect.version >= (1, 2):
3009            return super().bracket_sql(expression)
3010
3011        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
3012        this = expression.this
3013        if isinstance(this, exp.Array):
3014            this.replace(exp.paren(this))
3015
3016        bracket = super().bracket_sql(expression)
3017
3018        if not expression.args.get("returns_list_for_maps"):
3019            if not this.type:
3020                from sqlglot.optimizer.annotate_types import annotate_types
3021
3022                this = annotate_types(this, dialect=self.dialect)
3023
3024            if this.is_type(exp.DType.MAP):
3025                bracket = f"({bracket})[1]"
3026
3027        return bracket
3028
3029    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
3030        func = expression.this
3031
3032        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
3033        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
3034        if isinstance(func, exp.ArrayAgg):
3035            if not isinstance(order := expression.expression, exp.Order):
3036                return self.sql(func)
3037
3038            # Save the original column for FILTER clause (before wrapping with Order)
3039            original_this = func.this
3040
3041            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3042            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3043            func.set(
3044                "this",
3045                exp.Order(
3046                    this=func.this.copy(),
3047                    expressions=order.expressions,
3048                ),
3049            )
3050
3051            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3052            # Use original_this (not the Order-wrapped version) for the FILTER condition
3053            array_agg_sql = self.function_fallback_sql(func)
3054            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3055
3056        # For other functions (like PERCENTILES), use existing logic
3057        expression_sql = self.sql(expression, "expression")
3058
3059        if isinstance(func, exp.PERCENTILES):
3060            # Make the order key the first arg and slide the fraction to the right
3061            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3062            order_col = expression.find(exp.Ordered)
3063            if order_col:
3064                func.set("expression", func.this)
3065                func.set("this", order_col.this)
3066
3067        this = self.sql(expression, "this").rstrip(")")
3068
3069        return f"{this}{expression_sql})"
3070
3071    def length_sql(self, expression: exp.Length) -> str:
3072        arg = expression.this
3073
3074        # Dialects like BQ and Snowflake also accept binary values as args, so
3075        # DDB will attempt to infer the type or resort to case/when resolution
3076        if not expression.args.get("binary") or arg.is_string:
3077            return self.func("LENGTH", arg)
3078
3079        if not arg.type:
3080            from sqlglot.optimizer.annotate_types import annotate_types
3081
3082            arg = annotate_types(arg, dialect=self.dialect)
3083
3084        if arg.is_type(*exp.DataType.TEXT_TYPES):
3085            return self.func("LENGTH", arg)
3086
3087        # We need these casts to make duckdb's static type checker happy
3088        blob = exp.cast(arg, exp.DType.VARBINARY)
3089        varchar = exp.cast(arg, exp.DType.VARCHAR)
3090
3091        case = (
3092            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3093            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3094            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3095        )
3096        return self.sql(case)
3097
3098    def bitlength_sql(self, expression: exp.BitLength) -> str:
3099        if not _is_binary(arg := expression.this):
3100            return self.func("BIT_LENGTH", arg)
3101
3102        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3103        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
3104
3105    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3106        arg = expression.expressions[0]
3107        if arg.is_type(*exp.DataType.REAL_TYPES):
3108            arg = exp.cast(arg, exp.DType.INT)
3109        return self.func("CHR", arg)
3110
3111    def collation_sql(self, expression: exp.Collation) -> str:
3112        self.unsupported("COLLATION function is not supported by DuckDB")
3113        return self.function_fallback_sql(expression)
3114
3115    def collate_sql(self, expression: exp.Collate) -> str:
3116        if not expression.expression.is_string:
3117            return super().collate_sql(expression)
3118
3119        raw = expression.expression.name
3120        if not raw:
3121            return self.sql(expression.this)
3122
3123        parts = []
3124        for part in raw.split("-"):
3125            lower = part.lower()
3126            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3127                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3128                    self.unsupported(
3129                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3130                    )
3131                parts.append(lower)
3132
3133        if not parts:
3134            return self.sql(expression.this)
3135        return super().collate_sql(
3136            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3137        )
3138
3139    def _validate_regexp_flags(self, flags: exp.Expr | None, supported_flags: str) -> str | None:
3140        """
3141        Validate and filter regexp flags for DuckDB compatibility.
3142
3143        Args:
3144            flags: The flags expression to validate
3145            supported_flags: String of supported flags (e.g., "ims", "cims").
3146                            Only these flags will be returned.
3147
3148        Returns:
3149            Validated/filtered flag string, or None if no valid flags remain
3150        """
3151        if not isinstance(flags, exp.Expr):
3152            return None
3153
3154        if not flags.is_string:
3155            self.unsupported("Non-literal regexp flags are not fully supported in DuckDB")
3156            return None
3157
3158        flag_str = flags.this
3159        unsupported = set(flag_str) - set(supported_flags)
3160
3161        if unsupported:
3162            self.unsupported(
3163                f"Regexp flags {sorted(unsupported)} are not supported in this context"
3164            )
3165
3166        flag_str = "".join(f for f in flag_str if f in supported_flags)
3167        return flag_str if flag_str else None
3168
3169    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3170        this = expression.this
3171        pattern = expression.expression
3172        position = expression.args.get("position")
3173        parameters = expression.args.get("parameters")
3174
3175        # Validate flags - only "ims" flags are supported for embedded patterns
3176        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3177
3178        if position:
3179            this = exp.Substring(this=this, start=position)
3180
3181        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3182        if validated_flags:
3183            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3184
3185        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3186        result = (
3187            exp.case()
3188            .when(
3189                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3190                exp.Literal.number(0),
3191            )
3192            .else_(
3193                exp.Length(
3194                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3195                )
3196            )
3197        )
3198
3199        return self.sql(result)
3200
3201    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3202        subject = expression.this
3203        pattern = expression.expression
3204        replacement = expression.args.get("replacement") or exp.Literal.string("")
3205        position = expression.args.get("position")
3206        occurrence = expression.args.get("occurrence")
3207        modifiers = expression.args.get("modifiers")
3208
3209        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3210
3211        # Handle occurrence (only literals supported)
3212        if occurrence and not occurrence.is_int:
3213            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3214        else:
3215            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3216            if occurrence > 1:
3217                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3218            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3219            elif (
3220                occurrence == 0
3221                and "g" not in validated_flags
3222                and not expression.args.get("single_replace")
3223            ):
3224                validated_flags += "g"
3225
3226        # Handle position (only literals supported)
3227        prefix = None
3228        if position and not position.is_int:
3229            self.unsupported("REGEXP_REPLACE with non-literal position")
3230        elif position and position.is_int and position.to_py() > 1:
3231            pos = position.to_py()
3232            prefix = exp.Substring(
3233                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3234            )
3235            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3236
3237        result: exp.Expr = exp.Anonymous(
3238            this="REGEXP_REPLACE",
3239            expressions=[
3240                subject,
3241                pattern,
3242                replacement,
3243                exp.Literal.string(validated_flags) if validated_flags else None,
3244            ],
3245        )
3246
3247        if prefix:
3248            result = exp.Concat(expressions=[prefix, result])
3249
3250        return self.sql(result)
3251
3252    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3253        this = expression.this
3254        pattern = expression.expression
3255        flag = expression.args.get("flag")
3256
3257        if expression.args.get("full_match"):
3258            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3259            flag = exp.Literal.string(validated_flags) if validated_flags else None
3260            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3261
3262        return self.func("REGEXP_MATCHES", this, pattern, flag)
3263
3264    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3265    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3266        this = expression.this
3267        expr = expression.expression
3268        max_dist = expression.args.get("max_dist")
3269
3270        if max_dist is None:
3271            return self.func("LEVENSHTEIN", this, expr)
3272
3273        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3274        levenshtein = exp.Levenshtein(this=this, expression=expr)
3275        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
3276
3277    def pad_sql(self, expression: exp.Pad) -> str:
3278        """
3279        Handle RPAD/LPAD for VARCHAR and BINARY types.
3280
3281        For VARCHAR: Delegate to parent class
3282        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3283        """
3284        string_arg = expression.this
3285        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3286
3287        if _is_binary(string_arg) or _is_binary(fill_arg):
3288            length_arg = expression.expression
3289            is_left = expression.args.get("is_left")
3290
3291            input_len = exp.ByteLength(this=string_arg)
3292            chars_needed = length_arg - input_len
3293            pad_count = exp.Greatest(
3294                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3295            )
3296            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3297
3298            left, right = string_arg, repeat_expr
3299            if is_left:
3300                left, right = right, left
3301
3302            result = exp.DPipe(this=left, expression=right)
3303            return self.sql(result)
3304
3305        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3306        return super().pad_sql(expression)
3307
3308    def minhash_sql(self, expression: exp.Minhash) -> str:
3309        k = expression.this
3310        exprs = expression.expressions
3311
3312        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3313            self.unsupported(
3314                "MINHASH with multiple expressions or * requires manual query restructuring"
3315            )
3316            return self.func("MINHASH", k, *exprs)
3317
3318        expr = exprs[0]
3319        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3320        return f"({self.sql(result)})"
3321
3322    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3323        expr = expression.this
3324        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3325        return f"({self.sql(result)})"
3326
3327    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3328        expr = expression.this
3329        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3330        return f"({self.sql(result)})"
3331
3332    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3333        return self.sql(
3334            exp.Filter(
3335                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3336                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3337            )
3338        )
3339
3340    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3341        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3342        return self.function_fallback_sql(expression)
3343
3344    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3345        arr = expression.this
3346        func = self.func("LIST_DISTINCT", arr)
3347
3348        if expression.args.get("check_null"):
3349            add_null_to_array = exp.func(
3350                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3351            )
3352            return self.sql(
3353                exp.If(
3354                    this=exp.NEQ(
3355                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3356                    ),
3357                    true=add_null_to_array,
3358                    false=func,
3359                )
3360            )
3361
3362        return func
3363
3364    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3365        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3366            return self._array_bag_sql(
3367                self.ARRAY_INTERSECTION_CONDITION,
3368                expression.expressions[0],
3369                expression.expressions[1],
3370            )
3371        return self.function_fallback_sql(expression)
3372
3373    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3374        arr1, arr2 = expression.this, expression.expression
3375        if expression.args.get("is_multiset"):
3376            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3377        return self.sql(
3378            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3379        )
3380
3381    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3382        """
3383        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3384        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3385        expressions that adjust the index at query time:
3386          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3387          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3388        """
3389        start, end = expression.args.get("start"), expression.args.get("end")
3390
3391        if expression.args.get("zero_based"):
3392            if start is not None:
3393                start = (
3394                    exp.case()
3395                    .when(
3396                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3397                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3398                    )
3399                    .else_(start)
3400                )
3401            if end is not None:
3402                end = (
3403                    exp.case()
3404                    .when(
3405                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3406                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3407                    )
3408                    .else_(end)
3409                )
3410
3411        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))
3412
3413    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3414        args = expression.expressions
3415
3416        if not args:
3417            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3418            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3419
3420        # Build placeholder values for template
3421        lengths = [exp.Length(this=arg) for arg in args]
3422        max_len = (
3423            lengths[0]
3424            if len(lengths) == 1
3425            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3426        )
3427
3428        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3429        empty_struct = exp.func(
3430            "STRUCT",
3431            *[
3432                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3433                for i in range(len(args))
3434            ],
3435        )
3436
3437        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3438        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3439        index = exp.column("__i") + 1
3440        transform_struct = exp.func(
3441            "STRUCT",
3442            *[
3443                exp.PropertyEQ(
3444                    this=exp.Literal.string(f"${i + 1}"),
3445                    expression=exp.func("COALESCE", arg, exp.array())[index],
3446                )
3447                for i, arg in enumerate(args)
3448            ],
3449        )
3450
3451        result = exp.replace_placeholders(
3452            self.ARRAYS_ZIP_TEMPLATE.copy(),
3453            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3454            all_empty_check=exp.and_(
3455                *[
3456                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3457                    for arg in args
3458                ]
3459            ),
3460            empty_struct=empty_struct,
3461            max_len=max_len,
3462            transform_struct=transform_struct,
3463        )
3464        return self.sql(result)
3465
3466    def lower_sql(self, expression: exp.Lower) -> str:
3467        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3468        return _gen_with_cast_to_blob(self, expression, result_sql)
3469
3470    def upper_sql(self, expression: exp.Upper) -> str:
3471        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3472        return _gen_with_cast_to_blob(self, expression, result_sql)
3473
3474    def reverse_sql(self, expression: exp.Reverse) -> str:
3475        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3476        return _gen_with_cast_to_blob(self, expression, result_sql)
3477
3478    def _left_right_sql(self, expression: exp.Left | exp.Right, func_name: str) -> str:
3479        arg = expression.this
3480        length = expression.expression
3481        is_binary = _is_binary(arg)
3482
3483        if is_binary:
3484            # LEFT/RIGHT(blob, n) becomes UNHEX(LEFT/RIGHT(HEX(blob), n * 2))
3485            # Each byte becomes 2 hex chars, so multiply length by 2
3486            hex_arg = exp.Hex(this=arg)
3487            hex_length = exp.Mul(this=length, expression=exp.Literal.number(2))
3488            result: exp.Expression = exp.Unhex(
3489                this=exp.Anonymous(this=func_name, expressions=[hex_arg, hex_length])
3490            )
3491        else:
3492            result = exp.Anonymous(this=func_name, expressions=[arg, length])
3493
3494        if expression.args.get("negative_length_returns_empty"):
3495            empty: exp.Expression = exp.Literal.string("")
3496            if is_binary:
3497                empty = exp.Unhex(this=empty)
3498            result = exp.case().when(length < exp.Literal.number(0), empty).else_(result)
3499
3500        return self.sql(result)
3501
3502    def left_sql(self, expression: exp.Left) -> str:
3503        return self._left_right_sql(expression, "LEFT")
3504
3505    def right_sql(self, expression: exp.Right) -> str:
3506        return self._left_right_sql(expression, "RIGHT")
3507
3508    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3509        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
3510
3511    def stuff_sql(self, expression: exp.Stuff) -> str:
3512        base = expression.this
3513        start = expression.args["start"]
3514        length = expression.args["length"]
3515        insertion = expression.expression
3516        is_binary = _is_binary(base)
3517
3518        if is_binary:
3519            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3520            # (each byte = 2 hex chars), then UNHEX back to BLOB
3521            base = exp.Hex(this=base)
3522            insertion = exp.Hex(this=insertion)
3523            left = exp.Substring(
3524                this=base.copy(),
3525                start=exp.Literal.number(1),
3526                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3527            )
3528            right = exp.Substring(
3529                this=base.copy(),
3530                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3531                + exp.Literal.number(1),
3532            )
3533        else:
3534            left = exp.Substring(
3535                this=base.copy(),
3536                start=exp.Literal.number(1),
3537                length=start.copy() - exp.Literal.number(1),
3538            )
3539            right = exp.Substring(this=base.copy(), start=start + length)
3540        result: exp.Expr = exp.DPipe(
3541            this=exp.DPipe(this=left, expression=insertion), expression=right
3542        )
3543
3544        if is_binary:
3545            result = exp.Unhex(this=result)
3546
3547        return self.sql(result)
3548
3549    def rand_sql(self, expression: exp.Rand) -> str:
3550        seed = expression.this
3551        if seed is not None:
3552            self.unsupported("RANDOM with seed is not supported in DuckDB")
3553
3554        lower = expression.args.get("lower")
3555        upper = expression.args.get("upper")
3556
3557        if lower and upper:
3558            # scale DuckDB's [0,1) to the specified range
3559            range_size = exp.paren(upper - lower)
3560            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3561
3562            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3563            result = exp.cast(scaled, exp.DType.BIGINT)
3564            return self.sql(result)
3565
3566        # Default DuckDB behavior - just return RANDOM() as float
3567        return "RANDOM()"
3568
3569    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3570        arg = expression.this
3571
3572        # Check if it's a text type (handles both literals and annotated expressions)
3573        if arg.is_type(*exp.DataType.TEXT_TYPES):
3574            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3575
3576        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3577        return self.func("OCTET_LENGTH", arg)
3578
3579    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3580        # DuckDB TO_BASE64 requires BLOB input
3581        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3582        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3583        result = expression.this
3584
3585        # Check if input is a string type - ENCODE only accepts VARCHAR
3586        if result.is_type(*exp.DataType.TEXT_TYPES):
3587            result = exp.Encode(this=result)
3588
3589        result = exp.ToBase64(this=result)
3590
3591        max_line_length = expression.args.get("max_line_length")
3592        alphabet = expression.args.get("alphabet")
3593
3594        # Handle custom alphabet by replacing standard chars with custom ones
3595        result = _apply_base64_alphabet_replacements(result, alphabet)
3596
3597        # Handle max_line_length by inserting newlines every N characters
3598        line_length = (
3599            t.cast(int, max_line_length.to_py())
3600            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3601            else 0
3602        )
3603        if line_length > 0:
3604            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3605            result = exp.Trim(
3606                this=exp.RegexpReplace(
3607                    this=result,
3608                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3609                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3610                ),
3611                expression=newline,
3612                position="TRAILING",
3613            )
3614
3615        return self.sql(result)
3616
3617    def hex_sql(self, expression: exp.Hex) -> str:
3618        case = expression.args.get("case")
3619
3620        if not case:
3621            return self.func("HEX", expression.this)
3622
3623        hex_expr = exp.Hex(this=expression.this)
3624        return self.sql(
3625            exp.case()
3626            .when(case.is_(exp.null()), exp.null())
3627            .when(case.copy().eq(0), exp.Lower(this=hex_expr.copy()))
3628            .else_(hex_expr)
3629        )
3630
3631    def replace_sql(self, expression: exp.Replace) -> str:
3632        result_sql = self.func(
3633            "REPLACE",
3634            _cast_to_varchar(expression.this),
3635            _cast_to_varchar(expression.expression),
3636            _cast_to_varchar(expression.args.get("replacement")),
3637        )
3638        return _gen_with_cast_to_blob(self, expression, result_sql)
3639
3640    def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
3641        _prepare_binary_bitwise_args(expression)
3642        result_sql = self.binary(expression, op)
3643        return _gen_with_cast_to_blob(self, expression, result_sql)
3644
3645    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3646        _prepare_binary_bitwise_args(expression)
3647        result_sql = self.func("XOR", expression.this, expression.expression)
3648        return _gen_with_cast_to_blob(self, expression, result_sql)
3649
3650    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3651        this = expression.this
3652        key = expression.args.get("key")
3653        key_sql = key.name if isinstance(key, exp.Expr) else ""
3654        value_sql = self.sql(expression, "value")
3655
3656        kv_sql = f"{key_sql} := {value_sql}"
3657
3658        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3659        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3660        if isinstance(this, exp.Struct) and not this.expressions:
3661            return self.func("STRUCT_PACK", kv_sql)
3662
3663        return self.func("STRUCT_INSERT", this, kv_sql)
3664
3665    def mapcat_sql(self, expression: exp.MapCat) -> str:
3666        result = exp.replace_placeholders(
3667            self.MAPCAT_TEMPLATE.copy(),
3668            map1=expression.this,
3669            map2=expression.expression,
3670        )
3671        return self.sql(result)
3672
3673    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3674        return self.func(
3675            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3676        )
3677
3678    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3679        map_arg = expression.this
3680        keys_to_delete = expression.expressions
3681
3682        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3683
3684        lambda_expr = exp.Lambda(
3685            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3686            expressions=[exp.to_identifier("x")],
3687        )
3688        result = exp.func(
3689            "MAP_FROM_ENTRIES",
3690            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3691        )
3692        return self.sql(result)
3693
3694    def mappick_sql(self, expression: exp.MapPick) -> str:
3695        map_arg = expression.this
3696        keys_to_pick = expression.expressions
3697
3698        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3699
3700        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3701            lambda_expr = exp.Lambda(
3702                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3703                expressions=[exp.to_identifier("x")],
3704            )
3705        else:
3706            lambda_expr = exp.Lambda(
3707                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3708                expressions=[exp.to_identifier("x")],
3709            )
3710
3711        result = exp.func(
3712            "MAP_FROM_ENTRIES",
3713            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3714        )
3715        return self.sql(result)
3716
3717    def mapsize_sql(self, expression: exp.MapSize) -> str:
3718        return self.func("CARDINALITY", expression.this)
3719
3720    @unsupported_args("update_flag")
3721    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3722        map_arg = expression.this
3723        key = expression.args.get("key")
3724        value = expression.args.get("value")
3725
3726        map_type = map_arg.type
3727
3728        if value is not None:
3729            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3730                # Extract the value type from MAP(key_type, value_type)
3731                value_type = map_type.expressions[1]
3732                # Cast value to match the map's value type to avoid type conflicts
3733                value = exp.cast(value, value_type)
3734            # else: polymorphic MAP case - no type parameters available, use value as-is
3735
3736        # Create a single-entry map for the new key-value pair
3737        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3738        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3739
3740        # Use MAP_CONCAT to merge the original map with the new entry
3741        # This automatically handles both insert and update cases
3742        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3743
3744        return self.sql(result)
3745
3746    def startswith_sql(self, expression: exp.StartsWith) -> str:
3747        return self.func(
3748            "STARTS_WITH",
3749            _cast_to_varchar(expression.this),
3750            _cast_to_varchar(expression.expression),
3751        )
3752
3753    def space_sql(self, expression: exp.Space) -> str:
3754        # DuckDB's REPEAT requires BIGINT for the count parameter
3755        return self.sql(
3756            exp.Repeat(
3757                this=exp.Literal.string(" "),
3758                times=exp.cast(expression.this, exp.DType.BIGINT),
3759            )
3760        )
3761
3762    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3763        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3764        if isinstance(expression.this, exp.Generator):
3765            # Preserve alias, joins, and other table-level args
3766            table = exp.Table(
3767                this=expression.this,
3768                alias=expression.args.get("alias"),
3769                joins=expression.args.get("joins"),
3770            )
3771            return self.sql(table)
3772
3773        return super().tablefromrows_sql(expression)
3774
3775    def unnest_sql(self, expression: exp.Unnest) -> str:
3776        explode_array = expression.args.get("explode_array")
3777        if explode_array:
3778            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3779            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3780            expression.expressions.append(
3781                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3782            )
3783
3784            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3785            alias = expression.args.get("alias")
3786            if isinstance(alias, exp.TableAlias):
3787                expression.set("alias", None)
3788                if alias.columns:
3789                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3790
3791            unnest_sql = super().unnest_sql(expression)
3792            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3793            return self.sql(select)
3794
3795        return super().unnest_sql(expression)
3796
3797    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3798        this = expression.this
3799
3800        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3801            # DuckDB should render IGNORE NULLS only for the general-purpose
3802            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3803            return super().ignorenulls_sql(expression)
3804
3805        if isinstance(this, exp.First):
3806            this = exp.AnyValue(this=this.this)
3807
3808        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3809            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3810
3811        return self.sql(this)
3812
3813    def split_sql(self, expression: exp.Split) -> str:
3814        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3815
3816        case_expr = exp.case().else_(base_func)
3817        needs_case = False
3818
3819        if expression.args.get("null_returns_null"):
3820            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3821            needs_case = True
3822
3823        if expression.args.get("empty_delimiter_returns_whole"):
3824            # When delimiter is empty string, return input string as single array element
3825            array_with_input = exp.array(expression.this)
3826            case_expr = case_expr.when(
3827                expression.expression.eq(exp.Literal.string("")), array_with_input
3828            )
3829            needs_case = True
3830
3831        return self.sql(case_expr if needs_case else base_func)
3832
3833    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3834        string_arg = expression.this
3835        delimiter_arg = expression.args.get("delimiter")
3836        part_index_arg = expression.args.get("part_index")
3837
3838        if delimiter_arg and part_index_arg:
3839            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3840            if expression.args.get("part_index_zero_as_one"):
3841                # Convert 0 to 1 for compatibility
3842
3843                part_index_arg = exp.Paren(
3844                    this=exp.case()
3845                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3846                    .else_(part_index_arg)
3847                )
3848
3849            # Use Anonymous to avoid recursion
3850            base_func_expr: exp.Expr = exp.Anonymous(
3851                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3852            )
3853            needs_case_transform = False
3854            case_expr = exp.case().else_(base_func_expr)
3855
3856            if expression.args.get("empty_delimiter_returns_whole"):
3857                # When delimiter is empty string:
3858                # - Return whole string if part_index is 1 or -1
3859                # - Return empty string otherwise
3860                empty_case = exp.Paren(
3861                    this=exp.case()
3862                    .when(
3863                        exp.or_(
3864                            part_index_arg.eq(exp.Literal.number("1")),
3865                            part_index_arg.eq(exp.Literal.number("-1")),
3866                        ),
3867                        string_arg,
3868                    )
3869                    .else_(exp.Literal.string(""))
3870                )
3871
3872                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3873                needs_case_transform = True
3874
3875            """
3876            Output looks something like this:
3877
3878            CASE
3879            WHEN delimiter is '' THEN
3880                (
3881                    CASE
3882                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3883                    ELSE '' END
3884                )
3885            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3886            END
3887
3888            """
3889            return self.sql(case_expr if needs_case_transform else base_func_expr)
3890
3891        return self.function_fallback_sql(expression)
3892
3893    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3894        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3895            # DuckDB should render RESPECT NULLS only for the general-purpose
3896            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3897            return super().respectnulls_sql(expression)
3898
3899        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3900        return self.sql(expression, "this")
3901
3902    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3903        null = expression.args.get("null")
3904
3905        if expression.args.get("null_is_empty"):
3906            x = exp.to_identifier("x")
3907            list_transform = exp.Transform(
3908                this=expression.this.copy(),
3909                expression=exp.Lambda(
3910                    this=exp.Coalesce(
3911                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3912                    ),
3913                    expressions=[x],
3914                ),
3915            )
3916            array_to_string = exp.ArrayToString(
3917                this=list_transform, expression=expression.expression
3918            )
3919            if expression.args.get("null_delim_is_null"):
3920                return self.sql(
3921                    exp.case()
3922                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3923                    .else_(array_to_string)
3924                )
3925            return self.sql(array_to_string)
3926
3927        if null:
3928            x = exp.to_identifier("x")
3929            return self.sql(
3930                exp.ArrayToString(
3931                    this=exp.Transform(
3932                        this=expression.this,
3933                        expression=exp.Lambda(
3934                            this=exp.Coalesce(this=x, expressions=[null]),
3935                            expressions=[x],
3936                        ),
3937                    ),
3938                    expression=expression.expression,
3939                )
3940            )
3941
3942        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
3943
3944    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3945        # DuckDB-specific: handle binary types using DPipe (||) operator
3946        separator = seq_get(expression.expressions, 0)
3947        args = expression.expressions[1:]
3948
3949        if any(_is_binary(arg) for arg in [separator, *args]):
3950            result = args[0]
3951            for arg in args[1:]:
3952                result = exp.DPipe(
3953                    this=exp.DPipe(this=result, expression=separator), expression=arg
3954                )
3955            return self.sql(result)
3956
3957        return super().concatws_sql(expression)
3958
3959    def _regexp_extract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str:
3960        this = expression.this
3961        group = expression.args.get("group")
3962        params = expression.args.get("parameters")
3963        position = expression.args.get("position")
3964        occurrence = expression.args.get("occurrence")
3965        null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
3966
3967        # Handle Snowflake's 'e' flag: it enables capture group extraction
3968        # In DuckDB, this is controlled by the group parameter directly
3969        if params and params.is_string and "e" in params.name:
3970            params = exp.Literal.string(params.name.replace("e", ""))
3971
3972        validated_flags = self._validate_regexp_flags(params, supported_flags="cims")
3973
3974        # Strip default group when no following params (DuckDB default is same as group=0)
3975        if (
3976            not validated_flags
3977            and group
3978            and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
3979        ):
3980            group = None
3981
3982        flags_expr = exp.Literal.string(validated_flags) if validated_flags else None
3983
3984        # use substring to handle position argument
3985        if position and (not position.is_int or position.to_py() > 1):
3986            this = exp.Substring(this=this, start=position)
3987
3988            if null_if_pos_overflow:
3989                this = exp.Nullif(this=this, expression=exp.Literal.string(""))
3990
3991        is_extract_all = isinstance(expression, exp.RegexpExtractAll)
3992        non_single_occurrence = occurrence and (not occurrence.is_int or occurrence.to_py() > 1)
3993
3994        if is_extract_all or non_single_occurrence:
3995            name = "REGEXP_EXTRACT_ALL"
3996        else:
3997            name = "REGEXP_EXTRACT"
3998
3999        result: exp.Expr = exp.Anonymous(
4000            this=name, expressions=[this, expression.expression, group, flags_expr]
4001        )
4002
4003        # Array slicing for REGEXP_EXTRACT_ALL with occurrence
4004        if is_extract_all and non_single_occurrence:
4005            result = exp.Bracket(this=result, expressions=[exp.Slice(this=occurrence)])
4006        # ARRAY_EXTRACT for REGEXP_EXTRACT with occurrence > 1
4007        elif non_single_occurrence:
4008            result = exp.Anonymous(this="ARRAY_EXTRACT", expressions=[result, occurrence])
4009
4010        return self.sql(result)
4011
4012    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
4013        return self._regexp_extract_sql(expression)
4014
4015    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
4016        return self._regexp_extract_sql(expression)
4017
4018    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
4019        this = expression.this
4020        pattern = expression.expression
4021        position = expression.args.get("position")
4022        orig_occ = expression.args.get("occurrence")
4023        occurrence = orig_occ or exp.Literal.number(1)
4024        option = expression.args.get("option")
4025        parameters = expression.args.get("parameters")
4026
4027        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
4028        if validated_flags:
4029            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
4030
4031        # Handle starting position offset
4032        pos_offset: exp.Expr = exp.Literal.number(0)
4033        if position and (not position.is_int or position.to_py() > 1):
4034            this = exp.Substring(this=this, start=position)
4035            pos_offset = position - exp.Literal.number(1)
4036
4037        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
4038        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
4039            lst = exp.Bracket(
4040                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
4041                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
4042                offset=1,
4043            )
4044            transform = exp.Anonymous(
4045                this="LIST_TRANSFORM",
4046                expressions=[
4047                    lst,
4048                    exp.Lambda(
4049                        this=exp.Length(this=exp.to_identifier("x")),
4050                        expressions=[exp.to_identifier("x")],
4051                    ),
4052                ],
4053            )
4054            return exp.Coalesce(
4055                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4056                expressions=[exp.Literal.number(0)],
4057            )
4058
4059        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4060        base_pos: exp.Expr = (
4061            exp.Literal.number(1)
4062            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4063            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4064            + pos_offset
4065        )
4066
4067        # option=1: add match length for end position
4068        if option and option.is_int and option.to_py() == 1:
4069            match_at_occ = exp.Bracket(
4070                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4071                expressions=[occurrence],
4072                offset=1,
4073            )
4074            base_pos = base_pos + exp.Coalesce(
4075                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4076            )
4077
4078        # NULL checks for all provided arguments
4079        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4080        null_args = [
4081            expression.this,
4082            expression.expression,
4083            position,
4084            orig_occ,
4085            option,
4086            parameters,
4087        ]
4088        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4089
4090        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4091
4092        return self.sql(
4093            exp.case()
4094            .when(exp.or_(*null_checks), exp.Null())
4095            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4096            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4097            .else_(base_pos)
4098        )
4099
4100    @unsupported_args("culture")
4101    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4102        fmt = expression.args.get("format")
4103        if fmt and fmt.is_int:
4104            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4105
4106        self.unsupported("Only integer formats are supported by NumberToStr")
4107        return self.function_fallback_sql(expression)
4108
4109    def autoincrementcolumnconstraint_sql(self, _) -> str:
4110        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4111        return ""
4112
4113    def aliases_sql(self, expression: exp.Aliases) -> str:
4114        this = expression.this
4115        if isinstance(this, exp.Posexplode):
4116            return self.posexplode_sql(this)
4117
4118        return super().aliases_sql(expression)
4119
4120    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4121        this = expression.this
4122        parent = expression.parent
4123
4124        # The default Spark aliases are "pos" and "col", unless specified otherwise
4125        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4126
4127        if isinstance(parent, exp.Aliases):
4128            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4129            pos, col = parent.expressions
4130        elif isinstance(parent, exp.Table):
4131            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4132            alias = parent.args.get("alias")
4133            if alias:
4134                pos, col = alias.columns or [pos, col]
4135                alias.pop()
4136
4137        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4138        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4139        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4140        gen_subscripts = self.sql(
4141            exp.Alias(
4142                this=exp.Anonymous(
4143                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4144                )
4145                - exp.Literal.number(1),
4146                alias=pos,
4147            )
4148        )
4149
4150        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4151
4152        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4153            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4154            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4155
4156        return posexplode_sql
4157
4158    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4159        """
4160        Handles three key issues:
4161        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4162        2. End-of-month preservation: If input is last day of month, result is last day of result month
4163        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4164        """
4165        from sqlglot.optimizer.annotate_types import annotate_types
4166
4167        this = expression.this
4168        if not this.type:
4169            this = annotate_types(this, dialect=self.dialect)
4170
4171        if this.is_type(*exp.DataType.TEXT_TYPES):
4172            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4173
4174        # Detect float/decimal months to apply rounding (Snowflake behavior)
4175        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4176        months_expr = expression.expression
4177        if not months_expr.type:
4178            months_expr = annotate_types(months_expr, dialect=self.dialect)
4179
4180        # Build interval or to_months expression based on type
4181        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4182        interval_or_to_months = (
4183            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4184            if months_expr.is_type(
4185                exp.DType.FLOAT,
4186                exp.DType.DOUBLE,
4187                exp.DType.DECIMAL,
4188            )
4189            # Integer case: standard INTERVAL N MONTH syntax
4190            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4191        )
4192
4193        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4194
4195        # Apply end-of-month preservation if Snowflake flag is set
4196        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4197        preserve_eom = expression.args.get("preserve_end_of_month")
4198        result_expr = (
4199            exp.case()
4200            .when(
4201                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4202                exp.func("LAST_DAY", date_add_expr),
4203            )
4204            .else_(date_add_expr)
4205            if preserve_eom
4206            else date_add_expr
4207        )
4208
4209        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4210        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4211        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4212        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4213        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4214            return self.sql(exp.Cast(this=result_expr, to=this.type))
4215        return self.sql(result_expr)
4216
4217    def format_sql(self, expression: exp.Format) -> str:
4218        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4219            return self.func("FORMAT", "'{}'", expression.expressions[0])
4220
4221        return self.function_fallback_sql(expression)
4222
4223    def hexstring_sql(
4224        self, expression: exp.HexString, binary_function_repr: str | None = None
4225    ) -> str:
4226        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4227        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
4228
4229    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4230        unit = expression.args.get("unit")
4231        date = expression.this
4232
4233        week_start = _week_unit_to_dow(unit)
4234        unit = unit_to_str(expression)
4235
4236        if week_start:
4237            result = self.sql(
4238                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4239            )
4240        else:
4241            result = self.func("DATE_TRUNC", unit, date)
4242
4243        if (
4244            expression.args.get("input_type_preserved")
4245            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4246            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4247        ):
4248            return self.sql(exp.Cast(this=result, to=date.type))
4249
4250        return result
4251
4252    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4253        unit = unit_to_str(expression)
4254        zone = expression.args.get("zone")
4255        timestamp = expression.this
4256        date_unit = is_date_unit(unit)
4257
4258        if date_unit and zone:
4259            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4260            # Double AT TIME ZONE needed for BigQuery compatibility:
4261            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4262            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4263            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4264            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4265            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4266
4267        result = self.func("DATE_TRUNC", unit, timestamp)
4268        if expression.args.get("input_type_preserved"):
4269            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4270                dummy_date = exp.Cast(
4271                    this=exp.Literal.string("1970-01-01"),
4272                    to=exp.DataType(this=exp.DType.DATE),
4273                )
4274                date_time = exp.Add(this=dummy_date, expression=timestamp)
4275                result = self.func("DATE_TRUNC", unit, date_time)
4276                return self.sql(exp.Cast(this=result, to=timestamp.type))
4277
4278            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4279                date_unit and timestamp.is_type(exp.DType.DATE)
4280            ):
4281                return self.sql(exp.Cast(this=result, to=timestamp.type))
4282
4283        return result
4284
4285    def trim_sql(self, expression: exp.Trim) -> str:
4286        expression.this.replace(_cast_to_varchar(expression.this))
4287        if expression.expression:
4288            expression.expression.replace(_cast_to_varchar(expression.expression))
4289
4290        result_sql = super().trim_sql(expression)
4291        return _gen_with_cast_to_blob(self, expression, result_sql)
4292
4293    def round_sql(self, expression: exp.Round) -> str:
4294        this = expression.this
4295        decimals = expression.args.get("decimals")
4296        truncate = expression.args.get("truncate")
4297
4298        # DuckDB requires the scale (decimals) argument to be an INT
4299        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4300        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4301            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4302                decimals = exp.cast(decimals, exp.DType.INT)
4303
4304        func = "ROUND"
4305        if truncate:
4306            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4307            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4308                func = "ROUND_EVEN"
4309                truncate = None
4310            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4311            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4312                truncate = None
4313
4314        return self.func(func, this, decimals, truncate)
4315
4316    def strtok_sql(self, expression: exp.Strtok) -> str:
4317        string_arg = expression.this
4318        delimiter_arg = expression.args.get("delimiter")
4319        part_index_arg = expression.args.get("part_index")
4320
4321        if delimiter_arg and part_index_arg:
4322            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4323            escaped_delimiter = exp.Anonymous(
4324                this="REGEXP_REPLACE",
4325                expressions=[
4326                    delimiter_arg,
4327                    exp.Literal.string(
4328                        r"([\[\]^.\-*+?(){}|$\\])"
4329                    ),  # Escape problematic regex chars
4330                    exp.Literal.string(
4331                        r"\\\1"
4332                    ),  # Replace with escaped version using $1 backreference
4333                    exp.Literal.string("g"),  # Global flag
4334                ],
4335            )
4336            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4337            regex_pattern = (
4338                exp.case()
4339                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4340                .else_(
4341                    exp.func(
4342                        "CONCAT",
4343                        exp.Literal.string("["),
4344                        escaped_delimiter,
4345                        exp.Literal.string("]"),
4346                    )
4347                )
4348            )
4349
4350            # STRTOK skips empty strings, so we need to filter them out
4351            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4352            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4353            x = exp.to_identifier("x")
4354            is_empty = x.eq(exp.Literal.string(""))
4355            filtered_array = exp.func(
4356                "LIST_FILTER",
4357                split_array,
4358                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4359            )
4360            base_func = exp.Bracket(
4361                this=filtered_array,
4362                expressions=[part_index_arg],
4363                offset=1,
4364            )
4365
4366            # Use template with the built regex pattern
4367            result = exp.replace_placeholders(
4368                self.STRTOK_TEMPLATE.copy(),
4369                string=string_arg,
4370                delimiter=delimiter_arg,
4371                part_index=part_index_arg,
4372                base_func=base_func,
4373            )
4374
4375            return self.sql(result)
4376
4377        return self.function_fallback_sql(expression)
4378
4379    def strtoktoarray_sql(self, expression: exp.StrtokToArray) -> str:
4380        string_arg = expression.this
4381        delimiter_arg = expression.args.get("expression") or exp.Literal.string(" ")
4382
4383        escaped = exp.RegexpReplace(
4384            this=delimiter_arg.copy(),
4385            expression=exp.Literal.string(r"([\[\]^.\-*+?(){}|$\\])"),
4386            replacement=exp.Literal.string(r"\\\1"),
4387            modifiers=exp.Literal.string("g"),
4388        )
4389        return self.sql(
4390            exp.replace_placeholders(
4391                self.STRTOK_TO_ARRAY_TEMPLATE.copy(),
4392                string=string_arg,
4393                delimiter=delimiter_arg,
4394                escaped=escaped,
4395            )
4396        )
4397
4398    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4399        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4400
4401        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4402        if expression.is_type(*exp.DataType.REAL_TYPES):
4403            result = f"CAST({result} AS DOUBLE)"
4404
4405        return result
4406
4407    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4408        """
4409        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4410        dividing the input distribution into n equal-sized buckets.
4411
4412        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4413        does not document the specific algorithm used so results may differ. DuckDB does not
4414        support RESPECT NULLS.
4415        """
4416        this = expression.this
4417        if isinstance(this, exp.Distinct):
4418            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4419            if len(this.expressions) < 2:
4420                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4421                return self.function_fallback_sql(expression)
4422            num_quantiles_expr = this.expressions[1].pop()
4423        else:
4424            num_quantiles_expr = expression.expression
4425
4426        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4427            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4428            return self.function_fallback_sql(expression)
4429
4430        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4431        if num_quantiles <= 0:
4432            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4433            return self.function_fallback_sql(expression)
4434
4435        quantiles = [
4436            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4437            for i in range(num_quantiles + 1)
4438        ]
4439
4440        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))
4441
4442    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4443        if expression.args.get("scalar_only"):
4444            expression = exp.JSONExtractScalar(
4445                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4446            )
4447        return _arrow_json_extract_sql(self, expression)
4448
4449    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4450        this = expression.this
4451
4452        if _is_binary(this):
4453            expression.type = exp.DType.BINARY.into_expr()
4454
4455        arg = _cast_to_bit(this)
4456
4457        if isinstance(this, exp.Neg):
4458            arg = exp.Paren(this=arg)
4459
4460        expression.set("this", arg)
4461
4462        result_sql = f"~{self.sql(expression, 'this')}"
4463
4464        return _gen_with_cast_to_blob(self, expression, result_sql)
4465
4466    def window_sql(self, expression: exp.Window) -> str:
4467        this = expression.this
4468        if isinstance(this, exp.Corr) or (
4469            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4470        ):
4471            return self._corr_sql(expression)
4472
4473        return super().window_sql(expression)
4474
4475    def filter_sql(self, expression: exp.Filter) -> str:
4476        if isinstance(expression.this, exp.Corr):
4477            return self._corr_sql(expression)
4478
4479        return super().filter_sql(expression)
4480
4481    def _corr_sql(
4482        self,
4483        expression: exp.Filter | exp.Window | exp.Corr,
4484    ) -> str:
4485        if isinstance(expression, exp.Corr) and not expression.args.get("null_on_zero_variance"):
4486            return self.func("CORR", expression.this, expression.expression)
4487
4488        corr_expr = _maybe_corr_null_to_false(expression)
4489        if corr_expr is None:
4490            if isinstance(expression, exp.Window):
4491                return super().window_sql(expression)
4492            if isinstance(expression, exp.Filter):
4493                return super().filter_sql(expression)
4494            corr_expr = expression  # make mypy happy
4495
4496        return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
4497
4498    def uuid_sql(self, expression: exp.Uuid) -> str:
4499        namespace = expression.this
4500        name = expression.args.get("name")
4501
4502        # UUID v5 (namespace + name) - Emulate using SHA1
4503        if namespace and name:
4504            result = exp.replace_placeholders(
4505                self.UUID_V5_TEMPLATE.copy(),
4506                namespace=namespace,
4507                name=name,
4508            )
4509            return self.sql(result)
4510
4511        return super().uuid_sql(expression)
TIMEZONE_PATTERN = re.compile(':\\d{2}.*?[+\\-]\\d{2}(?::\\d{2})?')
REGEX_ESCAPE_REPLACEMENTS = {'\\': '\\\\', '-': '\\-', '^': '\\^', '[': '\\[', ']': '\\]'}
RANDSTR_CHAR_POOL = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
RANDSTR_SEED = 123456
WS_CONTROL_CHARS_TO_DUCK = {'\x0b': 11, '\x1c': 28, '\x1d': 29, '\x1e': 30, '\x1f': 31}
WEEK_START_DAY_TO_DOW = {'MONDAY': 1, 'TUESDAY': 2, 'WEDNESDAY': 3, 'THURSDAY': 4, 'FRIDAY': 5, 'SATURDAY': 6, 'SUNDAY': 7}
MAX_BIT_POSITION = Literal(this=32768, is_string=False)
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (<class 'sqlglot.expressions.core.Binary'>, <class 'sqlglot.expressions.core.Bracket'>, <class 'sqlglot.expressions.core.In'>, <class 'sqlglot.expressions.core.Not'>)
sqlglot.generators.duckdb

Arguments:

Inherited Members