Edit on GitHub

sqlglot.generators.duckdb

   1from __future__ import annotations
   2
   3from decimal import Decimal
   4from itertools import groupby
   5import re
   6import typing as t
   7
   8from sqlglot import exp, generator, transforms
   9
  10from sqlglot.dialects.dialect import (
  11    DATETIME_DELTA,
  12    JSON_EXTRACT_TYPE,
  13    approx_count_distinct_sql,
  14    array_append_sql,
  15    array_compact_sql,
  16    array_concat_sql,
  17    arrow_json_extract_sql,
  18    count_if_to_sum,
  19    date_delta_to_binary_interval_op,
  20    datestrtodate_sql,
  21    encode_decode_sql,
  22    explode_to_unnest_sql,
  23    generate_series_sql,
  24    getbit_sql,
  25    groupconcat_sql,
  26    inline_array_unless_query,
  27    months_between_sql,
  28    no_datetime_sql,
  29    no_comment_column_constraint_sql,
  30    no_make_interval_sql,
  31    no_time_sql,
  32    no_timestamp_sql,
  33    rename_func,
  34    remove_from_array_using_filter,
  35    strposition_sql,
  36    str_to_time_sql,
  37    timestrtotime_sql,
  38    unit_to_str,
  39)
  40from sqlglot.generator import unsupported_args
  41from sqlglot.helper import is_date_unit, seq_get
  42from builtins import type as Type
  43
  44# Regex to detect time zones in timestamps of the form [+|-]TT[:tt]
  45# The pattern matches timezone offsets that appear after the time portion
  46TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?")
  47
  48# Characters that must be escaped when building regex expressions in INITCAP
  49REGEX_ESCAPE_REPLACEMENTS = {
  50    "\\": "\\\\",
  51    "-": r"\-",
  52    "^": r"\^",
  53    "[": r"\[",
  54    "]": r"\]",
  55}
  56
  57# Used to in RANDSTR transpilation
  58RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  59RANDSTR_SEED = 123456
  60
  61# Whitespace control characters that DuckDB must process with `CHR({val})` calls
  62WS_CONTROL_CHARS_TO_DUCK = {
  63    "\u000b": 11,
  64    "\u001c": 28,
  65    "\u001d": 29,
  66    "\u001e": 30,
  67    "\u001f": 31,
  68}
  69
  70# Days of week to ISO 8601 day-of-week numbers
  71# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
  72WEEK_START_DAY_TO_DOW = {
  73    "MONDAY": 1,
  74    "TUESDAY": 2,
  75    "WEDNESDAY": 3,
  76    "THURSDAY": 4,
  77    "FRIDAY": 5,
  78    "SATURDAY": 6,
  79    "SUNDAY": 7,
  80}
  81
  82MAX_BIT_POSITION = exp.Literal.number(32768)
  83
  84# cs/as/ps are Snowflake defaults; DuckDB already behaves the same way, so they are safe to drop.
  85# Note: "as" is also a reserved keyword in DuckDB, making it impossible to pass through.
  86_SNOWFLAKE_COLLATION_DEFAULTS = frozenset({"cs", "as", "ps"})
  87_SNOWFLAKE_COLLATION_UNSUPPORTED = frozenset(
  88    {"ci", "ai", "upper", "lower", "utf8", "bin", "pi", "fl", "fu", "trim", "ltrim", "rtrim"}
  89)
  90
  91# Window functions that support IGNORE/RESPECT NULLS in DuckDB
  92_IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = (
  93    exp.FirstValue,
  94    exp.Lag,
  95    exp.LastValue,
  96    exp.Lead,
  97    exp.NthValue,
  98)
  99
 100# SEQ function constants
 101_SEQ_BASE: exp.Expr = exp.maybe_parse("(ROW_NUMBER() OVER (ORDER BY 1) - 1)")
 102_SEQ_RESTRICTED = (exp.Where, exp.Having, exp.AggFunc, exp.Order, exp.Select)
 103# Maps SEQ expression types to their byte width (suffix indicates bytes: SEQ1=1, SEQ2=2, etc.)
 104_SEQ_BYTE_WIDTH = {exp.Seq1: 1, exp.Seq2: 2, exp.Seq4: 4, exp.Seq8: 8}
 105
 106# Template for generating signed and unsigned SEQ values within a specified range
 107_SEQ_UNSIGNED: exp.Expr = exp.maybe_parse(":base % :max_val")
 108_SEQ_SIGNED: exp.Expr = exp.maybe_parse(
 109    "(CASE WHEN :base % :max_val >= :half "
 110    "THEN :base % :max_val - :max_val "
 111    "ELSE :base % :max_val END)"
 112)
 113
 114
 115def _apply_base64_alphabet_replacements(
 116    result: exp.Expr,
 117    alphabet: exp.Expr | None,
 118    reverse: bool = False,
 119) -> exp.Expr:
 120    """
 121    Apply base64 alphabet character replacements.
 122
 123    Base64 alphabet can be 1-3 chars: 1st = index 62 ('+'), 2nd = index 63 ('/'), 3rd = padding ('=').
 124    zip truncates to the shorter string, so 1-char alphabet only replaces '+', 2-char replaces '+/', etc.
 125
 126    Args:
 127        result: The expression to apply replacements to
 128        alphabet: Custom alphabet literal (expected chars for +/=)
 129        reverse: If False, replace default with custom (encode)
 130                 If True, replace custom with default (decode)
 131    """
 132    if isinstance(alphabet, exp.Literal) and alphabet.is_string:
 133        for default_char, new_char in zip("+/=", alphabet.this):
 134            if new_char != default_char:
 135                find, replace = (new_char, default_char) if reverse else (default_char, new_char)
 136                result = exp.Replace(
 137                    this=result,
 138                    expression=exp.Literal.string(find),
 139                    replacement=exp.Literal.string(replace),
 140                )
 141    return result
 142
 143
 144def _base64_decode_sql(self: DuckDBGenerator, expression: exp.Expr, to_string: bool) -> str:
 145    """
 146    Transpile Snowflake BASE64_DECODE_STRING/BINARY to DuckDB.
 147
 148    DuckDB uses FROM_BASE64() which returns BLOB. For string output, wrap with DECODE().
 149    Custom alphabets require REPLACE() calls to convert to standard base64.
 150    """
 151    input_expr = expression.this
 152    alphabet = expression.args.get("alphabet")
 153
 154    # Handle custom alphabet by replacing non-standard chars with standard ones
 155    input_expr = _apply_base64_alphabet_replacements(input_expr, alphabet, reverse=True)
 156
 157    # FROM_BASE64 returns BLOB
 158    input_expr = exp.FromBase64(this=input_expr)
 159
 160    if to_string:
 161        input_expr = exp.Decode(this=input_expr)
 162
 163    return self.sql(input_expr)
 164
 165
 166def _last_day_sql(self: DuckDBGenerator, expression: exp.LastDay) -> str:
 167    """
 168    DuckDB's LAST_DAY only supports finding the last day of a month.
 169    For other date parts (year, quarter, week), we need to implement equivalent logic.
 170    """
 171    date_expr = expression.this
 172    unit = expression.text("unit")
 173
 174    if not unit or unit.upper() == "MONTH":
 175        # Default behavior - use DuckDB's native LAST_DAY
 176        return self.func("LAST_DAY", date_expr)
 177
 178    if unit.upper() == "YEAR":
 179        # Last day of year: December 31st of the same year
 180        year_expr = exp.func("EXTRACT", "YEAR", date_expr)
 181        make_date_expr = exp.func(
 182            "MAKE_DATE", year_expr, exp.Literal.number(12), exp.Literal.number(31)
 183        )
 184        return self.sql(make_date_expr)
 185
 186    if unit.upper() == "QUARTER":
 187        # Last day of quarter
 188        year_expr = exp.func("EXTRACT", "YEAR", date_expr)
 189        quarter_expr = exp.func("EXTRACT", "QUARTER", date_expr)
 190
 191        # Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4
 192        last_month_expr = exp.Mul(this=quarter_expr, expression=exp.Literal.number(3))
 193        first_day_last_month_expr = exp.func(
 194            "MAKE_DATE", year_expr, last_month_expr, exp.Literal.number(1)
 195        )
 196
 197        # Last day of the last month of the quarter
 198        last_day_expr = exp.func("LAST_DAY", first_day_last_month_expr)
 199        return self.sql(last_day_expr)
 200
 201    if unit.upper() == "WEEK":
 202        # DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6
 203        dow = exp.func("EXTRACT", "DAYOFWEEK", date_expr)
 204        # Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake)
 205        # Wrap in parentheses to ensure correct precedence
 206        days_to_sunday_expr = exp.Mod(
 207            this=exp.Paren(this=exp.Sub(this=exp.Literal.number(7), expression=dow)),
 208            expression=exp.Literal.number(7),
 209        )
 210        interval_expr = exp.Interval(this=days_to_sunday_expr, unit=exp.var("DAY"))
 211        add_expr = exp.Add(this=date_expr, expression=interval_expr)
 212        cast_expr = exp.cast(add_expr, exp.DType.DATE)
 213        return self.sql(cast_expr)
 214
 215    self.unsupported(f"Unsupported date part '{unit}' in LAST_DAY function")
 216    return self.function_fallback_sql(expression)
 217
 218
 219def _is_nanosecond_unit(unit: exp.Expr | None) -> bool:
 220    return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND"
 221
 222
 223def _handle_nanosecond_diff(
 224    self: DuckDBGenerator,
 225    end_time: exp.Expr,
 226    start_time: exp.Expr,
 227) -> str:
 228    """Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
 229    end_ns = exp.cast(end_time, exp.DType.TIMESTAMP_NS)
 230    start_ns = exp.cast(start_time, exp.DType.TIMESTAMP_NS)
 231
 232    # Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
 233    return self.sql(
 234        exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns))
 235    )
 236
 237
 238def _to_boolean_sql(self: DuckDBGenerator, expression: exp.ToBoolean) -> str:
 239    """
 240    Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
 241
 242    DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
 243    We need to handle the 'on'/'off' cases explicitly.
 244
 245    For TO_BOOLEAN (safe=False): NaN and INF values cause errors. We use DuckDB's native ERROR()
 246    function to replicate this behavior with a clear error message.
 247
 248    For TRY_TO_BOOLEAN (safe=True): Use DuckDB's TRY_CAST for conversion, which returns NULL
 249    for invalid inputs instead of throwing errors.
 250    """
 251    arg = expression.this
 252    is_safe = expression.args.get("safe", False)
 253
 254    base_case_expr = (
 255        exp.case()
 256        .when(
 257            # Handle 'on' -> TRUE (case insensitive)
 258            exp.Upper(this=exp.cast(arg, exp.DType.VARCHAR)).eq(exp.Literal.string("ON")),
 259            exp.true(),
 260        )
 261        .when(
 262            # Handle 'off' -> FALSE (case insensitive)
 263            exp.Upper(this=exp.cast(arg, exp.DType.VARCHAR)).eq(exp.Literal.string("OFF")),
 264            exp.false(),
 265        )
 266    )
 267
 268    if is_safe:
 269        # TRY_TO_BOOLEAN: handle 'on'/'off' and use TRY_CAST for everything else
 270        case_expr = base_case_expr.else_(exp.func("TRY_CAST", arg, exp.DType.BOOLEAN.into_expr()))
 271    else:
 272        # TO_BOOLEAN: handle NaN/INF errors, 'on'/'off', and use regular CAST
 273        cast_to_real = exp.func("TRY_CAST", arg, exp.DataType.build(exp.DType.FLOAT))
 274
 275        # Check for NaN and INF values
 276        nan_inf_check = exp.Or(
 277            this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
 278        )
 279
 280        case_expr = base_case_expr.when(
 281            nan_inf_check,
 282            exp.func(
 283                "ERROR",
 284                exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
 285            ),
 286        ).else_(exp.cast(arg, exp.DType.BOOLEAN))
 287
 288    return self.sql(case_expr)
 289
 290
 291# BigQuery -> DuckDB conversion for the DATE function
 292def _date_sql(self: DuckDBGenerator, expression: exp.Date) -> str:
 293    this = expression.this
 294    zone = self.sql(expression, "zone")
 295
 296    if zone:
 297        # BigQuery considers "this" at UTC, converts it to the specified
 298        # time zone and then keeps only the DATE part
 299        # To micmic that, we:
 300        #   (1) Cast to TIMESTAMP to remove DuckDB's local tz
 301        #   (2) Apply consecutive AtTimeZone calls for UTC -> zone conversion
 302        this = exp.cast(this, exp.DType.TIMESTAMP)
 303        at_utc = exp.AtTimeZone(this=this, zone=exp.Literal.string("UTC"))
 304        this = exp.AtTimeZone(this=at_utc, zone=zone)
 305
 306    return self.sql(exp.cast(expression=this, to=exp.DType.DATE))
 307
 308
 309# BigQuery -> DuckDB conversion for the TIME_DIFF function
 310def _timediff_sql(self: DuckDBGenerator, expression: exp.TimeDiff) -> str:
 311    unit = expression.unit
 312
 313    if _is_nanosecond_unit(unit):
 314        return _handle_nanosecond_diff(self, expression.expression, expression.this)
 315
 316    this = exp.cast(expression.this, exp.DType.TIME)
 317    expr = exp.cast(expression.expression, exp.DType.TIME)
 318
 319    # Although the 2 dialects share similar signatures, BQ seems to inverse
 320    # the sign of the result so the start/end time operands are flipped
 321    return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
 322
 323
 324def _date_delta_to_binary_interval_op(
 325    cast: bool = True,
 326) -> t.Callable[[DuckDBGenerator, DATETIME_DELTA], str]:
 327    """
 328    DuckDB override to handle:
 329    1. NANOSECOND operations (DuckDB doesn't support INTERVAL ... NANOSECOND)
 330    2. Float/decimal interval values (DuckDB INTERVAL requires integers)
 331    """
 332    base_impl = date_delta_to_binary_interval_op(cast=cast)
 333
 334    def _duckdb_date_delta_sql(self: DuckDBGenerator, expression: DATETIME_DELTA) -> str:
 335        unit = expression.unit
 336        interval_value = expression.expression
 337
 338        # Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
 339        if _is_nanosecond_unit(unit):
 340            if isinstance(interval_value, exp.Interval):
 341                interval_value = interval_value.this
 342
 343            timestamp_ns = exp.cast(expression.this, exp.DType.TIMESTAMP_NS)
 344
 345            return self.sql(
 346                exp.func(
 347                    "MAKE_TIMESTAMP_NS",
 348                    exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=interval_value),
 349                )
 350            )
 351
 352        # Handle float/decimal interval values as duckDB INTERVAL requires integer expressions
 353        if not interval_value or isinstance(interval_value, exp.Interval):
 354            return base_impl(self, expression)
 355
 356        if interval_value.is_type(*exp.DataType.REAL_TYPES):
 357            expression.set("expression", exp.cast(exp.func("ROUND", interval_value), "INT"))
 358
 359        return base_impl(self, expression)
 360
 361    return _duckdb_date_delta_sql
 362
 363
 364def _array_insert_sql(self: DuckDBGenerator, expression: exp.ArrayInsert) -> str:
 365    """
 366    Transpile ARRAY_INSERT to DuckDB using LIST_CONCAT and slicing.
 367
 368    Handles:
 369    - 0-based and 1-based indexing (normalizes to 0-based for calculations)
 370    - Negative position conversion (requires array length)
 371    - NULL propagation (source dialects return NULL, DuckDB creates single-element array)
 372    - Assumes position is within bounds per user constraint
 373
 374    Note: All dialects that support ARRAY_INSERT (Snowflake, Spark, Databricks) have
 375    ARRAY_FUNCS_PROPAGATES_NULLS=True, so we always assume source propagates NULLs.
 376
 377    Args:
 378        expression: The ArrayInsert expression to transpile.
 379
 380    Returns:
 381        SQL string implementing ARRAY_INSERT behavior.
 382    """
 383    this = expression.this
 384    position = expression.args.get("position")
 385    element = expression.expression
 386    element_array = exp.Array(expressions=[element])
 387    index_offset = expression.args.get("offset", 0)
 388
 389    if not position or not position.is_int:
 390        self.unsupported("ARRAY_INSERT can only be transpiled with a literal position")
 391        return self.func("ARRAY_INSERT", this, position, element)
 392
 393    pos_value = position.to_py()
 394
 395    # Normalize one-based indexing to zero-based for slice calculations
 396    # Spark (1-based) -> Snowflake (0-based):
 397    #   Positive: pos=1 -> pos=0 (subtract 1)
 398    #   Negative: pos=-2 -> pos=-1 (add 1)
 399    # Example: Spark array_insert([a,b,c], -2, d) -> [a,b,d,c] is same as Snowflake pos=-1
 400    if pos_value > 0:
 401        pos_value = pos_value - index_offset
 402    elif pos_value < 0:
 403        pos_value = pos_value + index_offset
 404
 405    # Build the appropriate list_concat expression based on position
 406    if pos_value == 0:
 407        # insert at beginning
 408        concat_exprs = [element_array, this]
 409    elif pos_value > 0:
 410        # Positive position: LIST_CONCAT(arr[1:pos], [elem], arr[pos+1:])
 411        # 0-based -> DuckDB 1-based slicing
 412
 413        # left slice: arr[1:pos]
 414        slice_start = exp.Bracket(
 415            this=this,
 416            expressions=[
 417                exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
 418            ],
 419        )
 420
 421        # right slice: arr[pos+1:]
 422        slice_end = exp.Bracket(
 423            this=this, expressions=[exp.Slice(this=exp.Literal.number(pos_value + 1))]
 424        )
 425
 426        concat_exprs = [slice_start, element_array, slice_end]
 427    else:
 428        # Negative position: arr[1:LEN(arr)+pos], [elem], arr[LEN(arr)+pos+1:]
 429        # pos=-1 means insert before last element
 430        arr_len = exp.Length(this=this)
 431
 432        # Calculate slice position: LEN(arr) + pos (e.g., LEN(arr) + (-1) = LEN(arr) - 1)
 433        slice_end_pos = arr_len + exp.Literal.number(pos_value)
 434        slice_start_pos = slice_end_pos + exp.Literal.number(1)
 435
 436        # left slice: arr[1:LEN(arr)+pos]
 437        slice_start = exp.Bracket(
 438            this=this,
 439            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
 440        )
 441
 442        # right slice: arr[LEN(arr)+pos+1:]
 443        slice_end = exp.Bracket(this=this, expressions=[exp.Slice(this=slice_start_pos)])
 444
 445        concat_exprs = [slice_start, element_array, slice_end]
 446
 447    # All dialects that support ARRAY_INSERT propagate NULLs (Snowflake/Spark/Databricks)
 448    # Wrap in CASE WHEN array IS NULL THEN NULL ELSE func_expr END
 449    return self.sql(
 450        exp.If(
 451            this=exp.Is(this=this, expression=exp.Null()),
 452            true=exp.Null(),
 453            false=self.func("LIST_CONCAT", *concat_exprs),
 454        )
 455    )
 456
 457
 458def _array_remove_at_sql(self: DuckDBGenerator, expression: exp.ArrayRemoveAt) -> str:
 459    """
 460    Transpile ARRAY_REMOVE_AT to DuckDB using LIST_CONCAT and slicing.
 461
 462    Handles:
 463    - Positive positions (0-based indexing)
 464    - Negative positions (from end of array)
 465    - NULL propagation (Snowflake returns NULL for NULL array, DuckDB doesn't auto-propagate)
 466    - Only supports literal integer positions (non-literals remain untranspiled)
 467
 468    Transpilation patterns:
 469    - pos=0 (first): arr[2:]
 470    - pos>0 (middle): LIST_CONCAT(arr[1:p], arr[p+2:])
 471    - pos=-1 (last): arr[1:LEN(arr)-1]
 472    - pos<-1: LIST_CONCAT(arr[1:LEN(arr)+p], arr[LEN(arr)+p+2:])
 473
 474    All wrapped in: CASE WHEN arr IS NULL THEN NULL ELSE ... END
 475
 476    Args:
 477        expression: The ArrayRemoveAt expression to transpile.
 478
 479    Returns:
 480        SQL string implementing ARRAY_REMOVE_AT behavior.
 481    """
 482    this = expression.this
 483    position = expression.args.get("position")
 484
 485    if not position or not position.is_int:
 486        self.unsupported("ARRAY_REMOVE_AT can only be transpiled with a literal position")
 487        return self.func("ARRAY_REMOVE_AT", this, position)
 488
 489    pos_value = position.to_py()
 490
 491    # Build the appropriate expression based on position
 492    if pos_value == 0:
 493        # Remove first element: arr[2:]
 494        result_expr: exp.Expr | str = exp.Bracket(
 495            this=this,
 496            expressions=[exp.Slice(this=exp.Literal.number(2))],
 497        )
 498    elif pos_value > 0:
 499        # Remove at positive position: LIST_CONCAT(arr[1:pos], arr[pos+2:])
 500        # DuckDB uses 1-based slicing
 501        left_slice = exp.Bracket(
 502            this=this,
 503            expressions=[
 504                exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
 505            ],
 506        )
 507        right_slice = exp.Bracket(
 508            this=this,
 509            expressions=[exp.Slice(this=exp.Literal.number(pos_value + 2))],
 510        )
 511        result_expr = self.func("LIST_CONCAT", left_slice, right_slice)
 512    elif pos_value == -1:
 513        # Remove last element: arr[1:LEN(arr)-1]
 514        # Optimization: simpler than general negative case
 515        arr_len = exp.Length(this=this)
 516        slice_end = arr_len + exp.Literal.number(-1)
 517        result_expr = exp.Bracket(
 518            this=this,
 519            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end)],
 520        )
 521    else:
 522        # Remove at negative position: LIST_CONCAT(arr[1:LEN(arr)+pos], arr[LEN(arr)+pos+2:])
 523        arr_len = exp.Length(this=this)
 524        slice_end_pos = arr_len + exp.Literal.number(pos_value)
 525        slice_start_pos = slice_end_pos + exp.Literal.number(2)
 526
 527        left_slice = exp.Bracket(
 528            this=this,
 529            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
 530        )
 531        right_slice = exp.Bracket(
 532            this=this,
 533            expressions=[exp.Slice(this=slice_start_pos)],
 534        )
 535        result_expr = self.func("LIST_CONCAT", left_slice, right_slice)
 536
 537    # Snowflake ARRAY_FUNCS_PROPAGATES_NULLS=True, so wrap in NULL check
 538    # CASE WHEN array IS NULL THEN NULL ELSE result_expr END
 539    return self.sql(
 540        exp.If(
 541            this=exp.Is(this=this, expression=exp.Null()),
 542            true=exp.Null(),
 543            false=result_expr,
 544        )
 545    )
 546
 547
 548@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
 549def _array_sort_sql(self: DuckDBGenerator, expression: exp.ArraySort) -> str:
 550    return self.func("ARRAY_SORT", expression.this)
 551
 552
 553def _array_contains_sql(self: DuckDBGenerator, expression: exp.ArrayContains) -> str:
 554    this = expression.this
 555    expr = expression.expression
 556
 557    func = self.func("ARRAY_CONTAINS", this, expr)
 558
 559    if expression.args.get("check_null"):
 560        check_null_in_array = exp.Nullif(
 561            this=exp.NEQ(this=exp.ArraySize(this=this), expression=exp.func("LIST_COUNT", this)),
 562            expression=exp.false(),
 563        )
 564        return self.sql(exp.If(this=expr.is_(exp.Null()), true=check_null_in_array, false=func))
 565
 566    return func
 567
 568
 569def _array_overlaps_sql(self: DuckDBGenerator, expression: exp.ArrayOverlaps) -> str:
 570    """
 571    Translates Snowflake's NULL-safe ARRAYS_OVERLAP to DuckDB.
 572
 573    DuckDB's native && operator is not NULL-safe: [1,NULL,3] && [NULL,4,5] returns FALSE.
 574    Snowflake returns TRUE when both arrays contain NULL (NULLs are treated as known values).
 575
 576    Generated SQL: (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
 577
 578    ARRAY_LENGTH counts all elements (including NULLs); LIST_COUNT counts only non-NULLs.
 579    When they differ, the array contains at least one NULL, matching Snowflake's NULL-safe semantics.
 580    """
 581    if not expression.args.get("null_safe"):
 582        return self.binary(expression, "&&")
 583
 584    arr1 = expression.this
 585    arr2 = expression.expression
 586
 587    check_nulls = exp.and_(
 588        exp.NEQ(
 589            this=exp.ArraySize(this=arr1.copy()),
 590            expression=exp.func("LIST_COUNT", arr1.copy()),
 591        ),
 592        exp.NEQ(
 593            this=exp.ArraySize(this=arr2.copy()),
 594            expression=exp.func("LIST_COUNT", arr2.copy()),
 595        ),
 596        copy=False,
 597    )
 598
 599    overlap = exp.ArrayOverlaps(this=arr1.copy(), expression=arr2.copy())
 600
 601    return self.sql(
 602        exp.or_(
 603            exp.paren(overlap, copy=False),
 604            exp.paren(check_nulls, copy=False),
 605            copy=False,
 606            wrap=False,
 607        )
 608    )
 609
 610
 611def _struct_sql(self: DuckDBGenerator, expression: exp.Struct) -> str:
 612    ancestor_cast = expression.find_ancestor(exp.Cast, exp.Select)
 613    ancestor_cast = None if isinstance(ancestor_cast, exp.Select) else ancestor_cast
 614
 615    # Empty struct cast works with MAP() since DuckDB can't parse {}
 616    if not expression.expressions:
 617        if isinstance(ancestor_cast, exp.Cast) and ancestor_cast.to.is_type(exp.DType.MAP):
 618            return "MAP()"
 619
 620    args: list[str] = []
 621
 622    # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
 623    # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB
 624    # The transformation to ROW will take place if:
 625    #  1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would
 626    #  2. A cast to STRUCT / ARRAY of STRUCTs is found
 627    is_bq_inline_struct = (
 628        (expression.find(exp.PropertyEQ) is None)
 629        and ancestor_cast
 630        and any(
 631            casted_type.is_type(exp.DType.STRUCT)
 632            for casted_type in ancestor_cast.find_all(exp.DataType)
 633        )
 634    )
 635
 636    for i, expr in enumerate(expression.expressions):
 637        is_property_eq = isinstance(expr, exp.PropertyEQ)
 638        this = expr.this
 639        value = expr.expression if is_property_eq else expr
 640
 641        if is_bq_inline_struct:
 642            args.append(self.sql(value))
 643        else:
 644            if isinstance(this, exp.Identifier):
 645                key = self.sql(exp.Literal.string(expr.name))
 646            elif is_property_eq:
 647                key = self.sql(this)
 648            else:
 649                key = self.sql(exp.Literal.string(f"_{i}"))
 650
 651            args.append(f"{key}: {self.sql(value)}")
 652
 653    csv_args = ", ".join(args)
 654
 655    return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}"
 656
 657
 658def _datatype_sql(self: DuckDBGenerator, expression: exp.DataType) -> str:
 659    if expression.is_type("array"):
 660        return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]"
 661
 662    # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE
 663    if expression.is_type(exp.DType.TIME, exp.DType.TIMETZ, exp.DType.TIMESTAMPTZ):
 664        return expression.this.value
 665
 666    return self.datatype_sql(expression)
 667
 668
 669def _json_format_sql(self: DuckDBGenerator, expression: exp.JSONFormat) -> str:
 670    sql = self.func("TO_JSON", expression.this, expression.args.get("options"))
 671    return f"CAST({sql} AS TEXT)"
 672
 673
 674def _build_seq_expression(base: exp.Expr, byte_width: int, signed: bool) -> exp.Expr:
 675    """Build a SEQ expression with the given base, byte width, and signedness."""
 676    bits = byte_width * 8
 677    max_val = exp.Literal.number(2**bits)
 678
 679    if signed:
 680        half = exp.Literal.number(2 ** (bits - 1))
 681        return exp.replace_placeholders(_SEQ_SIGNED.copy(), base=base, max_val=max_val, half=half)
 682    return exp.replace_placeholders(_SEQ_UNSIGNED.copy(), base=base, max_val=max_val)
 683
 684
 685def _seq_to_range_in_generator(expression: exp.Expr) -> exp.Expr:
 686    """
 687    Transform SEQ functions to `range` column references when inside a GENERATOR context.
 688
 689    When GENERATOR(ROWCOUNT => N) becomes RANGE(N) in DuckDB, it produces a column
 690    named `range` with values 0, 1, ..., N-1. SEQ functions produce the same sequence,
 691    so we replace them with `range % max_val` to avoid nested window function issues.
 692    """
 693    if not isinstance(expression, exp.Select):
 694        return expression
 695
 696    from_ = expression.args.get("from_")
 697    if not (
 698        from_
 699        and isinstance(from_.this, exp.TableFromRows)
 700        and isinstance(from_.this.this, exp.Generator)
 701    ):
 702        return expression
 703
 704    def replace_seq(node: exp.Expr) -> exp.Expr:
 705        if isinstance(node, (exp.Seq1, exp.Seq2, exp.Seq4, exp.Seq8)):
 706            byte_width = _SEQ_BYTE_WIDTH[type(node)]
 707            return _build_seq_expression(exp.column("range"), byte_width, signed=node.name == "1")
 708        return node
 709
 710    return expression.transform(replace_seq, copy=False)
 711
 712
 713def _seq_sql(self: DuckDBGenerator, expression: exp.Func, byte_width: int) -> str:
 714    """
 715    Transpile Snowflake SEQ1/SEQ2/SEQ4/SEQ8 to DuckDB.
 716
 717    Generates monotonically increasing integers starting from 0.
 718    The signed parameter (0 or 1) affects wrap-around behavior:
 719    - Unsigned (0): wraps at 2^(bits) - 1
 720    - Signed (1): wraps at 2^(bits-1) - 1, then goes negative
 721    """
 722    # Warn if SEQ is in a restricted context (Select stops search at current scope)
 723    ancestor = expression.find_ancestor(*_SEQ_RESTRICTED)
 724    if ancestor and (
 725        (not isinstance(ancestor, (exp.Order, exp.Select)))
 726        or (isinstance(ancestor, exp.Order) and isinstance(ancestor.parent, exp.Window))
 727    ):
 728        self.unsupported("SEQ in restricted context is not supported - use CTE or subquery")
 729
 730    result = _build_seq_expression(_SEQ_BASE.copy(), byte_width, signed=expression.name == "1")
 731    return self.sql(result)
 732
 733
 734def _unix_to_time_sql(self: DuckDBGenerator, expression: exp.UnixToTime) -> str:
 735    scale = expression.args.get("scale")
 736    timestamp = expression.this
 737    target_type = expression.args.get("target_type")
 738
 739    # Check if we need NTZ (naive timestamp in UTC)
 740    is_ntz = target_type and target_type.this in (
 741        exp.DType.TIMESTAMP,
 742        exp.DType.TIMESTAMPNTZ,
 743    )
 744
 745    if scale == exp.UnixToTime.MILLIS:
 746        # EPOCH_MS already returns TIMESTAMP (naive, UTC)
 747        return self.func("EPOCH_MS", timestamp)
 748    if scale == exp.UnixToTime.MICROS:
 749        # MAKE_TIMESTAMP already returns TIMESTAMP (naive, UTC)
 750        return self.func("MAKE_TIMESTAMP", timestamp)
 751
 752    # Other scales: divide and use TO_TIMESTAMP
 753    if scale not in (None, exp.UnixToTime.SECONDS):
 754        timestamp = exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))
 755
 756    to_timestamp: exp.Expr = exp.Anonymous(this="TO_TIMESTAMP", expressions=[timestamp])
 757
 758    if is_ntz:
 759        to_timestamp = exp.AtTimeZone(this=to_timestamp, zone=exp.Literal.string("UTC"))
 760
 761    return self.sql(to_timestamp)
 762
 763
 764WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In, exp.Not)
 765
 766
 767def _arrow_json_extract_sql(self: DuckDBGenerator, expression: JSON_EXTRACT_TYPE) -> str:
 768    arrow_sql = arrow_json_extract_sql(self, expression)
 769    if not expression.same_parent and isinstance(
 770        expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS
 771    ):
 772        arrow_sql = self.wrap(arrow_sql)
 773    return arrow_sql
 774
 775
 776def _implicit_datetime_cast(
 777    arg: exp.Expr | None, type: exp.DType = exp.DType.DATE
 778) -> exp.Expr | None:
 779    if isinstance(arg, exp.Literal) and arg.is_string:
 780        ts = arg.name
 781        if type == exp.DType.DATE and ":" in ts:
 782            type = exp.DType.TIMESTAMPTZ if TIMEZONE_PATTERN.search(ts) else exp.DType.TIMESTAMP
 783
 784        arg = exp.cast(arg, type)
 785
 786    return arg
 787
 788
 789def _week_unit_to_dow(unit: exp.Expr | None) -> int | None:
 790    """
 791    Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
 792    from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
 793
 794    Args:
 795        unit: The unit expression (Var for ISOWEEK or WeekStart)
 796
 797    Returns:
 798        The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
 799
 800        Examples:
 801            "WEEK(SUNDAY)" -> 7
 802            "WEEK(MONDAY)" -> 1
 803            "ISOWEEK" -> 1
 804    """
 805    # Handle plain Var expressions for ISOWEEK only
 806    if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
 807        return 1
 808
 809    # Handle WeekStart expressions with explicit day
 810    if isinstance(unit, exp.WeekStart):
 811        return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
 812
 813    return None
 814
 815
 816def _build_week_trunc_expression(
 817    date_expr: exp.Expr,
 818    start_dow: int,
 819    preserve_start_day: bool = False,
 820) -> exp.Expr:
 821    """
 822    Build DATE_TRUNC expression for week boundaries with custom start day.
 823
 824    DuckDB's DATE_TRUNC('WEEK', ...) always returns Monday. To align to a different
 825    start day, we shift the date before truncating.
 826
 827    Args:
 828        date_expr: The date expression to truncate.
 829        start_dow: ISO 8601 day-of-week number (Monday=1, ..., Sunday=7).
 830        preserve_start_day: If True, reverse the shift after truncating so the result lands on the
 831            correct week start day. Needed for DATE_TRUNC (absolute result matters) but
 832            not for DATE_DIFF (only relative alignment matters).
 833
 834    Shift formula: Sunday (7) gets +1, others get (1 - start_dow).
 835    """
 836    shift_days = 1 if start_dow == 7 else 1 - start_dow
 837    truncated = exp.func("DATE_TRUNC", unit=exp.var("WEEK"), this=date_expr)
 838
 839    if shift_days == 0:
 840        return truncated
 841
 842    shift = exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY"))
 843    shifted_date = exp.DateAdd(this=date_expr, expression=shift)
 844    truncated.set("this", shifted_date)
 845
 846    if preserve_start_day:
 847        interval = exp.Interval(this=exp.Literal.string(str(-shift_days)), unit=exp.var("DAY"))
 848        return exp.cast(
 849            exp.DateAdd(this=truncated, expression=interval), to=exp.DType.DATE, copy=False
 850        )
 851
 852    return truncated
 853
 854
 855def _date_diff_sql(self: DuckDBGenerator, expression: exp.DateDiff | exp.DatetimeDiff) -> str:
 856    unit = expression.unit
 857
 858    if _is_nanosecond_unit(unit):
 859        return _handle_nanosecond_diff(self, expression.this, expression.expression)
 860
 861    this = _implicit_datetime_cast(expression.this)
 862    expr = _implicit_datetime_cast(expression.expression)
 863
 864    # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
 865    #  SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
 866    #  SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
 867    # Whereas for other units such as MONTH it does respect month boundaries:
 868    #  SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
 869    date_part_boundary = expression.args.get("date_part_boundary")
 870
 871    # Extract week start day; returns None if day is dynamic (column/placeholder)
 872    week_start = _week_unit_to_dow(unit)
 873    if date_part_boundary and week_start and this and expr:
 874        expression.set("unit", exp.Literal.string("WEEK"))
 875
 876        # Truncate both dates to week boundaries to respect input dialect semantics
 877        this = _build_week_trunc_expression(this, week_start)
 878        expr = _build_week_trunc_expression(expr, week_start)
 879
 880    return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
 881
 882
 883def _generate_datetime_array_sql(
 884    self: DuckDBGenerator, expression: exp.GenerateDateArray | exp.GenerateTimestampArray
 885) -> str:
 886    is_generate_date_array = isinstance(expression, exp.GenerateDateArray)
 887
 888    type = exp.DType.DATE if is_generate_date_array else exp.DType.TIMESTAMP
 889    start = _implicit_datetime_cast(expression.args.get("start"), type=type)
 890    end = _implicit_datetime_cast(expression.args.get("end"), type=type)
 891
 892    # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES
 893    gen_series: exp.GenerateSeries | exp.Cast = exp.GenerateSeries(
 894        start=start, end=end, step=expression.args.get("step")
 895    )
 896
 897    if is_generate_date_array:
 898        # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for
 899        # GENERATE_DATE_ARRAY we must cast it back to DATE array
 900        gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>"))
 901
 902    return self.sql(gen_series)
 903
 904
 905def _json_extract_value_array_sql(
 906    self: DuckDBGenerator, expression: exp.JSONValueArray | exp.JSONExtractArray
 907) -> str:
 908    json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression)
 909    data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>"
 910    return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type)))
 911
 912
 913def _cast_to_varchar(arg: exp.Expr | None) -> exp.Expr | None:
 914    if arg and arg.type and not arg.is_type(*exp.DataType.TEXT_TYPES, exp.DType.UNKNOWN):
 915        return exp.cast(arg, exp.DType.VARCHAR)
 916    return arg
 917
 918
 919def _cast_to_boolean(arg: exp.Expr | None) -> exp.Expr | None:
 920    if arg and not arg.is_type(exp.DType.BOOLEAN):
 921        return exp.cast(arg, exp.DType.BOOLEAN)
 922    return arg
 923
 924
 925def _is_binary(arg: exp.Expr) -> bool:
 926    return arg.is_type(
 927        exp.DType.BINARY,
 928        exp.DType.VARBINARY,
 929        exp.DType.BLOB,
 930    )
 931
 932
 933def _gen_with_cast_to_blob(self: DuckDBGenerator, expression: exp.Expr, result_sql: str) -> str:
 934    if _is_binary(expression):
 935        blob = exp.DataType.build("BLOB", dialect="duckdb")
 936        result_sql = self.sql(exp.Cast(this=result_sql, to=blob))
 937    return result_sql
 938
 939
 940def _cast_to_bit(arg: exp.Expr) -> exp.Expr:
 941    if not _is_binary(arg):
 942        return arg
 943
 944    if isinstance(arg, exp.HexString):
 945        arg = exp.Unhex(this=exp.Literal.string(arg.this))
 946
 947    return exp.cast(arg, exp.DType.BIT)
 948
 949
 950def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
 951    if _is_binary(expression.this):
 952        expression.set("this", _cast_to_bit(expression.this))
 953    if _is_binary(expression.expression):
 954        expression.set("expression", _cast_to_bit(expression.expression))
 955
 956
 957def _day_navigation_sql(self: DuckDBGenerator, expression: exp.NextDay | exp.PreviousDay) -> str:
 958    """
 959    Transpile Snowflake's NEXT_DAY / PREVIOUS_DAY to DuckDB using date arithmetic.
 960
 961    Returns the DATE of the next/previous occurrence of the specified weekday.
 962
 963    Formulas:
 964    - NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
 965    - PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
 966
 967    Supports both literal and non-literal day names:
 968    - Literal: Direct lookup (e.g., 'Monday' -> 1)
 969    - Non-literal: CASE statement for runtime evaluation
 970
 971    Examples:
 972        NEXT_DAY('2024-01-01' (Monday), 'Monday')
 973          -> (1 - 1 + 6) % 7 + 1 = 6 % 7 + 1 = 7 days -> 2024-01-08
 974
 975        PREVIOUS_DAY('2024-01-15' (Monday), 'Friday')
 976          -> (1 - 5 + 6) % 7 + 1 = 2 % 7 + 1 = 3 days -> 2024-01-12
 977    """
 978    date_expr = expression.this
 979    day_name_expr = expression.expression
 980
 981    # Build ISODOW call for current day of week
 982    isodow_call = exp.func("ISODOW", date_expr)
 983
 984    # Determine target day of week
 985    if isinstance(day_name_expr, exp.Literal):
 986        # Literal day name: lookup target_dow directly
 987        day_name_str = day_name_expr.name.upper()
 988        matching_day = next(
 989            (day for day in WEEK_START_DAY_TO_DOW if day.startswith(day_name_str)), None
 990        )
 991        if matching_day:
 992            target_dow: exp.Expr = exp.Literal.number(WEEK_START_DAY_TO_DOW[matching_day])
 993        else:
 994            # Unrecognized day name, use fallback
 995            return self.function_fallback_sql(expression)
 996    else:
 997        # Non-literal day name: build CASE statement for runtime mapping
 998        upper_day_name = exp.Upper(this=day_name_expr)
 999        target_dow = exp.Case(
1000            ifs=[
1001                exp.If(
1002                    this=exp.func(
1003                        "STARTS_WITH", upper_day_name.copy(), exp.Literal.string(day[:2])
1004                    ),
1005                    true=exp.Literal.number(dow_num),
1006                )
1007                for day, dow_num in WEEK_START_DAY_TO_DOW.items()
1008            ]
1009        )
1010
1011    # Calculate days offset and apply interval based on direction
1012    if isinstance(expression, exp.NextDay):
1013        # NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
1014        days_offset = exp.paren(target_dow - isodow_call + 6, copy=False) % 7 + 1
1015        date_with_offset = date_expr + exp.Interval(this=days_offset, unit=exp.var("DAY"))
1016    else:  # exp.PreviousDay
1017        # PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
1018        days_offset = exp.paren(isodow_call - target_dow + 6, copy=False) % 7 + 1
1019        date_with_offset = date_expr - exp.Interval(this=days_offset, unit=exp.var("DAY"))
1020
1021    # Build final: CAST(date_with_offset AS DATE)
1022    return self.sql(exp.cast(date_with_offset, exp.DType.DATE))
1023
1024
1025def _anyvalue_sql(self: DuckDBGenerator, expression: exp.AnyValue) -> str:
1026    # Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
1027    having = expression.this
1028    if isinstance(having, exp.HavingMax):
1029        func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL"
1030        return self.func(func_name, having.this, having.expression)
1031    return self.function_fallback_sql(expression)
1032
1033
1034def _bitwise_agg_sql(
1035    self: DuckDBGenerator,
1036    expression: exp.BitwiseOrAgg | exp.BitwiseAndAgg | exp.BitwiseXorAgg,
1037) -> str:
1038    """
1039    DuckDB's bitwise aggregate functions only accept integer types. For other types:
1040    - DECIMAL/STRING: Use CAST(arg AS INT) to convert directly, will round to nearest int
1041    - FLOAT/DOUBLE: Use ROUND(arg)::INT to round to nearest integer, required due to float precision loss
1042    """
1043    if isinstance(expression, exp.BitwiseOrAgg):
1044        func_name = "BIT_OR"
1045    elif isinstance(expression, exp.BitwiseAndAgg):
1046        func_name = "BIT_AND"
1047    else:  # exp.BitwiseXorAgg
1048        func_name = "BIT_XOR"
1049
1050    arg = expression.this
1051
1052    if not arg.type:
1053        from sqlglot.optimizer.annotate_types import annotate_types
1054
1055        arg = annotate_types(arg, dialect=self.dialect)
1056
1057    if arg.is_type(*exp.DataType.REAL_TYPES, *exp.DataType.TEXT_TYPES):
1058        if arg.is_type(*exp.DataType.FLOAT_TYPES):
1059            # float types need to be rounded first due to precision loss
1060            arg = exp.func("ROUND", arg)
1061
1062        arg = exp.cast(arg, exp.DType.INT)
1063
1064    return self.func(func_name, arg)
1065
1066
1067def _literal_sql_with_ws_chr(self: DuckDBGenerator, literal: str) -> str:
1068    # DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
1069    if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
1070        return self.sql(exp.Literal.string(literal))
1071
1072    sql_segments: list[str] = []
1073    for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK):
1074        if is_ws_control:
1075            for ch in group:
1076                duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch]
1077                sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
1078        else:
1079            sql_segments.append(self.sql(exp.Literal.string("".join(group))))
1080
1081    sql = " || ".join(sql_segments)
1082    return sql if len(sql_segments) == 1 else f"({sql})"
1083
1084
1085def _escape_regex_metachars(
1086    self: DuckDBGenerator, delimiters: exp.Expr | None, delimiters_sql: str
1087) -> str:
1088    r"""
1089    Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions.
1090
1091    Literal strings are escaped at transpile time, expressions handled with REPLACE() calls.
1092    """
1093    if not delimiters:
1094        return delimiters_sql
1095
1096    if delimiters.is_string:
1097        literal_value = delimiters.this
1098        escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
1099        return _literal_sql_with_ws_chr(self, escaped_literal)
1100
1101    escaped_sql = delimiters_sql
1102    for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
1103        escaped_sql = self.func(
1104            "REPLACE",
1105            escaped_sql,
1106            self.sql(exp.Literal.string(raw)),
1107            self.sql(exp.Literal.string(escaped)),
1108        )
1109
1110    return escaped_sql
1111
1112
1113def _build_capitalization_sql(
1114    self: DuckDBGenerator,
1115    value_to_split: str,
1116    delimiters_sql: str,
1117) -> str:
1118    # empty string delimiter --> treat value as one word, no need to split
1119    if delimiters_sql == "''":
1120        return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
1121
1122    delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
1123    split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
1124
1125    # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
1126    # We do not know whether the first segment is a delimiter or not, so we check the first character of the string
1127    # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
1128    return self.func(
1129        "ARRAY_TO_STRING",
1130        exp.case()
1131        .when(
1132            f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})",
1133            self.func(
1134                "LIST_TRANSFORM",
1135                self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
1136                "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
1137            ),
1138        )
1139        .else_(
1140            self.func(
1141                "LIST_TRANSFORM",
1142                self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
1143                "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
1144            ),
1145        ),
1146        "''",
1147    )
1148
1149
1150def _initcap_sql(self: DuckDBGenerator, expression: exp.Initcap) -> str:
1151    this_sql = self.sql(expression, "this")
1152    delimiters = expression.args.get("expression")
1153    if delimiters is None:
1154        # fallback for manually created exp.Initcap w/o delimiters arg
1155        delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)
1156    delimiters_sql = self.sql(delimiters)
1157
1158    escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
1159
1160    return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
1161
1162
1163def _boolxor_agg_sql(self: DuckDBGenerator, expression: exp.BoolxorAgg) -> str:
1164    """
1165    Snowflake's `BOOLXOR_AGG(col)` returns TRUE if exactly one input in `col` is TRUE, FALSE otherwise;
1166    Since DuckDB does not have a mapping function, we mimic the behavior by generating `COUNT_IF(col) = 1`.
1167
1168    DuckDB's COUNT_IF strictly requires boolean inputs, so cast if not already boolean.
1169    """
1170    return self.sql(
1171        exp.EQ(
1172            this=exp.CountIf(this=_cast_to_boolean(expression.this)),
1173            expression=exp.Literal.number(1),
1174        )
1175    )
1176
1177
1178def _bitshift_sql(
1179    self: DuckDBGenerator, expression: exp.BitwiseLeftShift | exp.BitwiseRightShift
1180) -> str:
1181    """
1182    Transform bitshift expressions for DuckDB by injecting BIT/INT128 casts.
1183
1184    DuckDB's bitwise shift operators don't work with BLOB/BINARY types, so we cast
1185    them to BIT for the operation, then cast the result back to the original type.
1186
1187    Note: Assumes type annotation has been applied with the source dialect.
1188    """
1189    operator = "<<" if isinstance(expression, exp.BitwiseLeftShift) else ">>"
1190    result_is_blob = False
1191    this = expression.this
1192
1193    if _is_binary(this):
1194        result_is_blob = True
1195        expression.set("this", exp.cast(this, exp.DType.BIT))
1196    elif expression.args.get("requires_int128"):
1197        this.replace(exp.cast(this, exp.DType.INT128))
1198
1199    result_sql = self.binary(expression, operator)
1200
1201    # Wrap in parentheses if parent is a bitwise operator to "fix" DuckDB precedence issue
1202    # DuckDB parses: a << b | c << d  as  (a << b | c) << d
1203    if isinstance(expression.parent, exp.Binary):
1204        result_sql = self.sql(exp.Paren(this=result_sql))
1205
1206    if result_is_blob:
1207        result_sql = self.sql(
1208            exp.Cast(this=result_sql, to=exp.DataType.build("BLOB", dialect="duckdb"))
1209        )
1210
1211    return result_sql
1212
1213
1214def _scale_rounding_sql(
1215    self: DuckDBGenerator,
1216    expression: exp.Expr,
1217    rounding_func: Type[exp.Expr],
1218) -> str | None:
1219    """
1220    Handle scale parameter transformation for rounding functions.
1221
1222    DuckDB doesn't support the scale parameter for certain functions (e.g., FLOOR, CEIL),
1223    so we transform: FUNC(x, n) to ROUND(FUNC(x * 10^n) / 10^n, n)
1224
1225    Args:
1226        self: The DuckDB generator instance
1227        expression: The expression to transform (must have 'this', 'decimals', and 'to' args)
1228        rounding_func: The rounding function class to use in the transformation
1229
1230    Returns:
1231        The transformed SQL string if decimals parameter exists, None otherwise
1232    """
1233    decimals = expression.args.get("decimals")
1234
1235    if decimals is None or expression.args.get("to") is not None:
1236        return None
1237
1238    this = expression.this
1239    if isinstance(this, exp.Binary):
1240        this = exp.Paren(this=this)
1241
1242    n_int = decimals
1243    if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
1244        n_int = exp.cast(decimals, exp.DType.INT)
1245
1246    pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
1247    rounded = rounding_func(this=exp.Mul(this=this, expression=pow_))
1248    result = exp.Div(this=rounded, expression=pow_.copy())
1249
1250    return self.round_sql(
1251        exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
1252    )
1253
1254
1255def _ceil_floor(self: DuckDBGenerator, expression: exp.Floor | exp.Ceil) -> str:
1256    scaled_sql = _scale_rounding_sql(self, expression, type(expression))
1257    if scaled_sql is not None:
1258        return scaled_sql
1259    return self.ceil_floor(expression)
1260
1261
1262def _regr_val_sql(
1263    self: DuckDBGenerator,
1264    expression: exp.RegrValx | exp.RegrValy,
1265) -> str:
1266    """
1267    Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent.
1268
1269    REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x.
1270    REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y.
1271    """
1272    from sqlglot.optimizer.annotate_types import annotate_types
1273
1274    y = expression.this
1275    x = expression.expression
1276
1277    # Determine which argument to check for NULL and which to return based on expression type
1278    if isinstance(expression, exp.RegrValx):
1279        # REGR_VALX: check y for NULL, return x
1280        check_for_null = y
1281        return_value = x
1282        return_value_attr = "expression"
1283    else:
1284        # REGR_VALY: check x for NULL, return y
1285        check_for_null = x
1286        return_value = y
1287        return_value_attr = "this"
1288
1289    # Get the type from the return argument
1290    result_type = return_value.type
1291
1292    # If no type info, annotate the expression to infer types
1293    if not result_type or result_type.this == exp.DType.UNKNOWN:
1294        try:
1295            annotated = annotate_types(expression.copy(), dialect=self.dialect)
1296            result_type = getattr(annotated, return_value_attr).type
1297        except Exception:
1298            pass
1299
1300    # Default to DOUBLE for regression functions if type still unknown
1301    if not result_type or result_type.this == exp.DType.UNKNOWN:
1302        result_type = exp.DType.DOUBLE.into_expr()
1303
1304    # Cast NULL to the same type as return_value to avoid DuckDB type inference issues
1305    typed_null = exp.Cast(this=exp.Null(), to=result_type)
1306
1307    return self.sql(
1308        exp.If(
1309            this=exp.Is(this=check_for_null.copy(), expression=exp.Null()),
1310            true=typed_null,
1311            false=return_value.copy(),
1312        )
1313    )
1314
1315
1316def _maybe_corr_null_to_false(
1317    expression: exp.Filter | exp.Window | exp.Corr,
1318) -> exp.Filter | exp.Window | exp.Corr | None:
1319    corr = expression
1320    while isinstance(corr, (exp.Window, exp.Filter)):
1321        corr = corr.this
1322
1323    if not isinstance(corr, exp.Corr) or not corr.args.get("null_on_zero_variance"):
1324        return None
1325
1326    corr.set("null_on_zero_variance", False)
1327    return expression
1328
1329
1330def _date_from_parts_sql(self, expression: exp.DateFromParts) -> str:
1331    """
1332    Snowflake's DATE_FROM_PARTS allows out-of-range values for the month and day input.
1333    E.g., larger values (month=13, day=100), zero-values (month=0, day=0), negative values (month=-13, day=-100).
1334
1335    DuckDB's MAKE_DATE does not support out-of-range values, but DuckDB's INTERVAL type does.
1336
1337    We convert to date arithmetic:
1338    DATE_FROM_PARTS(year, month, day)
1339    - MAKE_DATE(year, 1, 1) + INTERVAL (month-1) MONTH + INTERVAL (day-1) DAY
1340    """
1341    year_expr = expression.args.get("year")
1342    month_expr = expression.args.get("month")
1343    day_expr = expression.args.get("day")
1344
1345    if expression.args.get("allow_overflow"):
1346        base_date: exp.Expr = exp.func(
1347            "MAKE_DATE", year_expr, exp.Literal.number(1), exp.Literal.number(1)
1348        )
1349
1350        if month_expr:
1351            base_date = base_date + exp.Interval(this=month_expr - 1, unit=exp.var("MONTH"))
1352
1353        if day_expr:
1354            base_date = base_date + exp.Interval(this=day_expr - 1, unit=exp.var("DAY"))
1355
1356        return self.sql(exp.cast(expression=base_date, to=exp.DType.DATE))
1357
1358    return self.func("MAKE_DATE", year_expr, month_expr, day_expr)
1359
1360
1361def _round_arg(arg: exp.Expr, round_input: bool | None = None) -> exp.Expr:
1362    if round_input:
1363        return exp.func("ROUND", arg, exp.Literal.number(0))
1364    return arg
1365
1366
1367def _boolnot_sql(self: DuckDBGenerator, expression: exp.Boolnot) -> str:
1368    arg = _round_arg(expression.this, expression.args.get("round_input"))
1369    return self.sql(exp.not_(exp.paren(arg)))
1370
1371
1372def _booland_sql(self: DuckDBGenerator, expression: exp.Booland) -> str:
1373    round_input = expression.args.get("round_input")
1374    left = _round_arg(expression.this, round_input)
1375    right = _round_arg(expression.expression, round_input)
1376    return self.sql(exp.paren(exp.and_(exp.paren(left), exp.paren(right), wrap=False)))
1377
1378
1379def _boolor_sql(self: DuckDBGenerator, expression: exp.Boolor) -> str:
1380    round_input = expression.args.get("round_input")
1381    left = _round_arg(expression.this, round_input)
1382    right = _round_arg(expression.expression, round_input)
1383    return self.sql(exp.paren(exp.or_(exp.paren(left), exp.paren(right), wrap=False)))
1384
1385
1386def _xor_sql(self: DuckDBGenerator, expression: exp.Xor) -> str:
1387    round_input = expression.args.get("round_input")
1388    left = _round_arg(expression.this, round_input)
1389    right = _round_arg(expression.expression, round_input)
1390    return self.sql(
1391        exp.or_(
1392            exp.paren(exp.and_(left.copy(), exp.paren(right.not_()), wrap=False)),
1393            exp.paren(exp.and_(exp.paren(left.not_()), right.copy(), wrap=False)),
1394            wrap=False,
1395        )
1396    )
1397
1398
1399def _explode_to_unnest_sql(self: DuckDBGenerator, expression: exp.Lateral) -> str:
1400    """Handle LATERAL VIEW EXPLODE/INLINE conversion to UNNEST for DuckDB."""
1401    explode = expression.this
1402
1403    if isinstance(explode, exp.Inline):
1404        # For INLINE, create CROSS JOIN LATERAL (SELECT UNNEST(..., max_depth => 2))
1405        # Build the UNNEST call with DuckDB-style named parameter
1406        unnest_expr = exp.Unnest(
1407            expressions=[
1408                explode.this,
1409                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)),
1410            ]
1411        )
1412        select_expr = exp.Select(expressions=[unnest_expr]).subquery()
1413
1414        alias_expr = expression.args.get("alias")
1415        if alias_expr and not alias_expr.this:
1416            # we need to provide a table name if not present
1417            alias_expr.set("this", exp.to_identifier(f"_u_{expression.index}"))
1418
1419        transformed_lateral_expr = exp.Lateral(this=select_expr, alias=alias_expr)
1420        cross_join_lateral_expr = exp.Join(this=transformed_lateral_expr, kind="CROSS")
1421
1422        return self.sql(cross_join_lateral_expr)
1423
1424    # For other cases, use the standard conversion
1425    return explode_to_unnest_sql(self, expression)
1426
1427
1428def _sha_sql(
1429    self: DuckDBGenerator,
1430    expression: exp.Expr,
1431    hash_func: str,
1432    is_binary: bool = False,
1433) -> str:
1434    arg = expression.this
1435
1436    # For SHA2 variants, check digest length (DuckDB only supports SHA256)
1437    if hash_func == "SHA256":
1438        length = expression.text("length") or "256"
1439        if length != "256":
1440            self.unsupported("DuckDB only supports SHA256 hashing algorithm.")
1441
1442    # Cast if type is incompatible with DuckDB
1443    if (
1444        arg.type
1445        and arg.type.this != exp.DType.UNKNOWN
1446        and not arg.is_type(*exp.DataType.TEXT_TYPES)
1447        and not _is_binary(arg)
1448    ):
1449        arg = exp.cast(arg, exp.DType.VARCHAR)
1450
1451    result = self.func(hash_func, arg)
1452    return self.func("UNHEX", result) if is_binary else result
1453
1454
1455class DuckDBGenerator(generator.Generator):
1456    PARAMETER_TOKEN = "$"
1457    NAMED_PLACEHOLDER_TOKEN = "$"
1458    JOIN_HINTS = False
1459    TABLE_HINTS = False
1460    QUERY_HINTS = False
1461    LIMIT_FETCH = "LIMIT"
1462    STRUCT_DELIMITER = ("(", ")")
1463    RENAME_TABLE_WITH_DB = False
1464    NVL2_SUPPORTED = False
1465    SEMI_ANTI_JOIN_WITH_SIDE = False
1466    TABLESAMPLE_KEYWORDS = "USING SAMPLE"
1467    TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
1468    LAST_DAY_SUPPORTS_DATE_PART = False
1469    JSON_KEY_VALUE_PAIR_SEP = ","
1470    IGNORE_NULLS_IN_FUNC = True
1471    IGNORE_NULLS_BEFORE_ORDER = False
1472    JSON_PATH_BRACKETED_KEY_SUPPORTED = False
1473    SUPPORTS_CREATE_TABLE_LIKE = False
1474    MULTI_ARG_DISTINCT = False
1475    CAN_IMPLEMENT_ARRAY_ANY = True
1476    SUPPORTS_TO_NUMBER = False
1477    SELECT_KINDS: tuple[str, ...] = ()
1478    SUPPORTS_DECODE_CASE = False
1479    SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
1480
1481    AFTER_HAVING_MODIFIER_TRANSFORMS = generator.AFTER_HAVING_MODIFIER_TRANSFORMS
1482    SUPPORTS_WINDOW_EXCLUDE = True
1483    COPY_HAS_INTO_KEYWORD = False
1484    STAR_EXCEPT = "EXCLUDE"
1485    PAD_FILL_PATTERN_IS_REQUIRED = True
1486    ARRAY_SIZE_DIM_REQUIRED: bool | None = False
1487    NORMALIZE_EXTRACT_DATE_PARTS = True
1488    SUPPORTS_LIKE_QUANTIFIERS = False
1489    SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
1490
1491    TRANSFORMS = {
1492        **generator.Generator.TRANSFORMS,
1493        exp.AnyValue: _anyvalue_sql,
1494        exp.ApproxDistinct: approx_count_distinct_sql,
1495        exp.Boolnot: _boolnot_sql,
1496        exp.Booland: _booland_sql,
1497        exp.Boolor: _boolor_sql,
1498        exp.Array: transforms.preprocess(
1499            [transforms.inherit_struct_field_names],
1500            generator=inline_array_unless_query,
1501        ),
1502        exp.ArrayAppend: array_append_sql("LIST_APPEND"),
1503        exp.ArrayCompact: array_compact_sql,
1504        exp.ArrayConstructCompact: lambda self, e: self.sql(
1505            exp.ArrayCompact(this=exp.Array(expressions=e.expressions))
1506        ),
1507        exp.ArrayConcat: array_concat_sql("LIST_CONCAT"),
1508        exp.ArrayContains: _array_contains_sql,
1509        exp.ArrayOverlaps: _array_overlaps_sql,
1510        exp.ArrayFilter: rename_func("LIST_FILTER"),
1511        exp.ArrayInsert: _array_insert_sql,
1512        exp.ArrayPosition: lambda self, e: (
1513            self.sql(
1514                exp.Sub(
1515                    this=exp.ArrayPosition(this=e.this, expression=e.expression),
1516                    expression=exp.Literal.number(1),
1517                )
1518            )
1519            if e.args.get("zero_based")
1520            else self.func("ARRAY_POSITION", e.this, e.expression)
1521        ),
1522        exp.ArrayRemoveAt: _array_remove_at_sql,
1523        exp.ArrayRemove: remove_from_array_using_filter,
1524        exp.ArraySort: _array_sort_sql,
1525        exp.ArrayPrepend: array_append_sql("LIST_PREPEND", swap_params=True),
1526        exp.ArraySum: rename_func("LIST_SUM"),
1527        exp.ArrayMax: rename_func("LIST_MAX"),
1528        exp.ArrayMin: rename_func("LIST_MIN"),
1529        exp.Base64DecodeBinary: lambda self, e: _base64_decode_sql(self, e, to_string=False),
1530        exp.Base64DecodeString: lambda self, e: _base64_decode_sql(self, e, to_string=True),
1531        exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
1532        exp.BitwiseAndAgg: _bitwise_agg_sql,
1533        exp.BitwiseCount: rename_func("BIT_COUNT"),
1534        exp.BitwiseLeftShift: _bitshift_sql,
1535        exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
1536        exp.BitwiseOrAgg: _bitwise_agg_sql,
1537        exp.BitwiseRightShift: _bitshift_sql,
1538        exp.BitwiseXorAgg: _bitwise_agg_sql,
1539        exp.CommentColumnConstraint: no_comment_column_constraint_sql,
1540        exp.Corr: lambda self, e: self._corr_sql(e),
1541        exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
1542        exp.CurrentTime: lambda *_: "CURRENT_TIME",
1543        exp.CurrentSchemas: lambda self, e: self.func(
1544            "current_schemas", e.this if e.this else exp.true()
1545        ),
1546        exp.CurrentTimestamp: lambda self, e: (
1547            self.sql(
1548                exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC"))
1549            )
1550            if e.args.get("sysdate")
1551            else "CURRENT_TIMESTAMP"
1552        ),
1553        exp.CurrentVersion: rename_func("version"),
1554        exp.Localtime: unsupported_args("this")(lambda *_: "LOCALTIME"),
1555        exp.DayOfMonth: rename_func("DAYOFMONTH"),
1556        exp.DayOfWeek: rename_func("DAYOFWEEK"),
1557        exp.DayOfWeekIso: rename_func("ISODOW"),
1558        exp.DayOfYear: rename_func("DAYOFYEAR"),
1559        exp.Dayname: lambda self, e: (
1560            self.func("STRFTIME", e.this, exp.Literal.string("%a"))
1561            if e.args.get("abbreviated")
1562            else self.func("DAYNAME", e.this)
1563        ),
1564        exp.Monthname: lambda self, e: (
1565            self.func("STRFTIME", e.this, exp.Literal.string("%b"))
1566            if e.args.get("abbreviated")
1567            else self.func("MONTHNAME", e.this)
1568        ),
1569        exp.DataType: _datatype_sql,
1570        exp.Date: _date_sql,
1571        exp.DateAdd: _date_delta_to_binary_interval_op(),
1572        exp.DateFromParts: _date_from_parts_sql,
1573        exp.DateSub: _date_delta_to_binary_interval_op(),
1574        exp.DateDiff: _date_diff_sql,
1575        exp.DateStrToDate: datestrtodate_sql,
1576        exp.Datetime: no_datetime_sql,
1577        exp.DatetimeDiff: _date_diff_sql,
1578        exp.DatetimeSub: _date_delta_to_binary_interval_op(),
1579        exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
1580        exp.DateToDi: lambda self, e: (
1581            f"CAST(STRFTIME({self.sql(e, 'this')}, {self.dialect.DATEINT_FORMAT}) AS INT)"
1582        ),
1583        exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
1584        exp.DiToDate: lambda self, e: (
1585            f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {self.dialect.DATEINT_FORMAT}) AS DATE)"
1586        ),
1587        exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
1588        exp.EqualNull: lambda self, e: self.sql(
1589            exp.NullSafeEQ(this=e.this, expression=e.expression)
1590        ),
1591        exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
1592        exp.GenerateDateArray: _generate_datetime_array_sql,
1593        exp.GenerateSeries: generate_series_sql("GENERATE_SERIES", "RANGE"),
1594        exp.GenerateTimestampArray: _generate_datetime_array_sql,
1595        exp.Getbit: getbit_sql,
1596        exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
1597        exp.Explode: rename_func("UNNEST"),
1598        exp.IcebergProperty: lambda *_: "",
1599        exp.IntDiv: lambda self, e: self.binary(e, "//"),
1600        exp.IsInf: rename_func("ISINF"),
1601        exp.IsNan: rename_func("ISNAN"),
1602        exp.IsNullValue: lambda self, e: self.sql(
1603            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("NULL"))
1604        ),
1605        exp.IsArray: lambda self, e: self.sql(
1606            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("ARRAY"))
1607        ),
1608        exp.Ceil: _ceil_floor,
1609        exp.Floor: _ceil_floor,
1610        exp.JSONBExists: rename_func("JSON_EXISTS"),
1611        exp.JSONExtract: _arrow_json_extract_sql,
1612        exp.JSONExtractArray: _json_extract_value_array_sql,
1613        exp.JSONFormat: _json_format_sql,
1614        exp.JSONValueArray: _json_extract_value_array_sql,
1615        exp.Lateral: _explode_to_unnest_sql,
1616        exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)),
1617        exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)),
1618        exp.Select: transforms.preprocess([_seq_to_range_in_generator]),
1619        exp.Seq1: lambda self, e: _seq_sql(self, e, 1),
1620        exp.Seq2: lambda self, e: _seq_sql(self, e, 2),
1621        exp.Seq4: lambda self, e: _seq_sql(self, e, 4),
1622        exp.Seq8: lambda self, e: _seq_sql(self, e, 8),
1623        exp.BoolxorAgg: _boolxor_agg_sql,
1624        exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
1625        exp.Initcap: _initcap_sql,
1626        exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
1627        exp.SHA: lambda self, e: _sha_sql(self, e, "SHA1"),
1628        exp.SHA1Digest: lambda self, e: _sha_sql(self, e, "SHA1", is_binary=True),
1629        exp.SHA2: lambda self, e: _sha_sql(self, e, "SHA256"),
1630        exp.SHA2Digest: lambda self, e: _sha_sql(self, e, "SHA256", is_binary=True),
1631        exp.MonthsBetween: months_between_sql,
1632        exp.NextDay: _day_navigation_sql,
1633        exp.PercentileCont: rename_func("QUANTILE_CONT"),
1634        exp.PercentileDisc: rename_func("QUANTILE_DISC"),
1635        # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
1636        # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
1637        exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
1638        exp.PreviousDay: _day_navigation_sql,
1639        exp.RegexpILike: lambda self, e: self.func(
1640            "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
1641        ),
1642        exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
1643        exp.RegrValx: _regr_val_sql,
1644        exp.RegrValy: _regr_val_sql,
1645        exp.Return: lambda self, e: self.sql(e, "this"),
1646        exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
1647        exp.StrToUnix: lambda self, e: self.func(
1648            "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
1649        ),
1650        exp.Struct: _struct_sql,
1651        exp.Transform: rename_func("LIST_TRANSFORM"),
1652        exp.TimeAdd: _date_delta_to_binary_interval_op(),
1653        exp.TimeSub: _date_delta_to_binary_interval_op(),
1654        exp.Time: no_time_sql,
1655        exp.TimeDiff: _timediff_sql,
1656        exp.Timestamp: no_timestamp_sql,
1657        exp.TimestampAdd: _date_delta_to_binary_interval_op(),
1658        exp.TimestampDiff: lambda self, e: self.func(
1659            "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
1660        ),
1661        exp.TimestampSub: _date_delta_to_binary_interval_op(),
1662        exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DType.DATE)),
1663        exp.TimeStrToTime: timestrtotime_sql,
1664        exp.TimeStrToUnix: lambda self, e: self.func(
1665            "EPOCH", exp.cast(e.this, exp.DType.TIMESTAMP)
1666        ),
1667        exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
1668        exp.ToBoolean: _to_boolean_sql,
1669        exp.ToVariant: lambda self, e: self.sql(
1670            exp.cast(e.this, exp.DataType.build("VARIANT", dialect="duckdb"))
1671        ),
1672        exp.TimeToUnix: rename_func("EPOCH"),
1673        exp.TsOrDiToDi: lambda self, e: (
1674            f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)"
1675        ),
1676        exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
1677        exp.TsOrDsDiff: lambda self, e: self.func(
1678            "DATE_DIFF",
1679            f"'{e.args.get('unit') or 'DAY'}'",
1680            exp.cast(e.expression, exp.DType.TIMESTAMP),
1681            exp.cast(e.this, exp.DType.TIMESTAMP),
1682        ),
1683        exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
1684        exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
1685        exp.UnixSeconds: lambda self, e: self.sql(
1686            exp.cast(self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DType.BIGINT)
1687        ),
1688        exp.UnixToStr: lambda self, e: self.func(
1689            "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
1690        ),
1691        exp.DatetimeTrunc: lambda self, e: self.func(
1692            "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DType.DATETIME)
1693        ),
1694        exp.UnixToTime: _unix_to_time_sql,
1695        exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
1696        exp.VariancePop: rename_func("VAR_POP"),
1697        exp.WeekOfYear: rename_func("WEEKOFYEAR"),
1698        exp.YearOfWeek: lambda self, e: self.sql(
1699            exp.Extract(
1700                this=exp.Var(this="ISOYEAR"),
1701                expression=e.this,
1702            )
1703        ),
1704        exp.YearOfWeekIso: lambda self, e: self.sql(
1705            exp.Extract(
1706                this=exp.Var(this="ISOYEAR"),
1707                expression=e.this,
1708            )
1709        ),
1710        exp.Xor: _xor_sql,
1711        exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1712        exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1713        exp.DateBin: rename_func("TIME_BUCKET"),
1714        exp.LastDay: _last_day_sql,
1715    }
1716
1717    SUPPORTED_JSON_PATH_PARTS = {
1718        exp.JSONPathKey,
1719        exp.JSONPathRoot,
1720        exp.JSONPathSubscript,
1721        exp.JSONPathWildcard,
1722    }
1723
1724    TYPE_MAPPING = {
1725        **generator.Generator.TYPE_MAPPING,
1726        exp.DType.BINARY: "BLOB",
1727        exp.DType.BPCHAR: "TEXT",
1728        exp.DType.CHAR: "TEXT",
1729        exp.DType.DATETIME: "TIMESTAMP",
1730        exp.DType.DECFLOAT: "DECIMAL(38, 5)",
1731        exp.DType.FLOAT: "REAL",
1732        exp.DType.JSONB: "JSON",
1733        exp.DType.NCHAR: "TEXT",
1734        exp.DType.NVARCHAR: "TEXT",
1735        exp.DType.UINT: "UINTEGER",
1736        exp.DType.VARBINARY: "BLOB",
1737        exp.DType.ROWVERSION: "BLOB",
1738        exp.DType.VARCHAR: "TEXT",
1739        exp.DType.TIMESTAMPLTZ: "TIMESTAMPTZ",
1740        exp.DType.TIMESTAMPNTZ: "TIMESTAMP",
1741        exp.DType.TIMESTAMP_S: "TIMESTAMP_S",
1742        exp.DType.TIMESTAMP_MS: "TIMESTAMP_MS",
1743        exp.DType.TIMESTAMP_NS: "TIMESTAMP_NS",
1744        exp.DType.BIGDECIMAL: "DECIMAL(38, 5)",
1745    }
1746
1747    # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
1748    RESERVED_KEYWORDS = {
1749        "array",
1750        "analyse",
1751        "union",
1752        "all",
1753        "when",
1754        "in_p",
1755        "default",
1756        "create_p",
1757        "window",
1758        "asymmetric",
1759        "to",
1760        "else",
1761        "localtime",
1762        "from",
1763        "end_p",
1764        "select",
1765        "current_date",
1766        "foreign",
1767        "with",
1768        "grant",
1769        "session_user",
1770        "or",
1771        "except",
1772        "references",
1773        "fetch",
1774        "limit",
1775        "group_p",
1776        "leading",
1777        "into",
1778        "collate",
1779        "offset",
1780        "do",
1781        "then",
1782        "localtimestamp",
1783        "check_p",
1784        "lateral_p",
1785        "current_role",
1786        "where",
1787        "asc_p",
1788        "placing",
1789        "desc_p",
1790        "user",
1791        "unique",
1792        "initially",
1793        "column",
1794        "both",
1795        "some",
1796        "as",
1797        "any",
1798        "only",
1799        "deferrable",
1800        "null_p",
1801        "current_time",
1802        "true_p",
1803        "table",
1804        "case",
1805        "trailing",
1806        "variadic",
1807        "for",
1808        "on",
1809        "distinct",
1810        "false_p",
1811        "not",
1812        "constraint",
1813        "current_timestamp",
1814        "returning",
1815        "primary",
1816        "intersect",
1817        "having",
1818        "analyze",
1819        "current_user",
1820        "and",
1821        "cast",
1822        "symmetric",
1823        "using",
1824        "order",
1825        "current_catalog",
1826    }
1827
1828    UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
1829
1830    # DuckDB doesn't generally support CREATE TABLE .. properties
1831    # https://duckdb.org/docs/sql/statements/create_table.html
1832    # There are a few exceptions (e.g. temporary tables) which are supported or
1833    # can be transpiled to DuckDB, so we explicitly override them accordingly
1834    PROPERTIES_LOCATION = {
1835        **{
1836            prop: exp.Properties.Location.UNSUPPORTED
1837            for prop in generator.Generator.PROPERTIES_LOCATION
1838        },
1839        exp.LikeProperty: exp.Properties.Location.POST_SCHEMA,
1840        exp.TemporaryProperty: exp.Properties.Location.POST_CREATE,
1841        exp.ReturnsProperty: exp.Properties.Location.POST_ALIAS,
1842        exp.SequenceProperties: exp.Properties.Location.POST_EXPRESSION,
1843        exp.IcebergProperty: exp.Properties.Location.POST_CREATE,
1844    }
1845
1846    IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS: t.ClassVar = _IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS
1847
1848    # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1849    ZIPF_TEMPLATE: exp.Expr = exp.maybe_parse(
1850        """
1851        WITH rand AS (SELECT :random_expr AS r),
1852        weights AS (
1853            SELECT i, 1.0 / POWER(i, :s) AS w
1854            FROM RANGE(1, :n + 1) AS t(i)
1855        ),
1856        cdf AS (
1857            SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1858            FROM weights
1859        )
1860        SELECT MIN(i)
1861        FROM cdf
1862        WHERE p >= (SELECT r FROM rand)
1863        """
1864    )
1865
1866    # Template for NORMAL transpilation using Box-Muller transform
1867    # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2))
1868    NORMAL_TEMPLATE: exp.Expr = exp.maybe_parse(
1869        ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))"
1870    )
1871
1872    # Template for generating a seeded pseudo-random value in [0, 1) from a hash
1873    SEEDED_RANDOM_TEMPLATE: exp.Expr = exp.maybe_parse("(ABS(HASH(:seed)) % 1000000) / 1000000.0")
1874
1875    # Template for generating signed and unsigned SEQ values within a specified range
1876    SEQ_UNSIGNED: exp.Expr = _SEQ_UNSIGNED
1877    SEQ_SIGNED: exp.Expr = _SEQ_SIGNED
1878
1879    # Template for MAP_CAT transpilation - Snowflake semantics:
1880    # 1. Returns NULL if either input is NULL
1881    # 2. For duplicate keys, prefers non-NULL value (COALESCE(m2[k], m1[k]))
1882    # 3. Filters out entries with NULL values from the result
1883    MAPCAT_TEMPLATE: exp.Expr = exp.maybe_parse(
1884        """
1885        CASE
1886            WHEN :map1 IS NULL OR :map2 IS NULL THEN NULL
1887            ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(
1888                LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(:map1), MAP_KEYS(:map2))),
1889                __k -> STRUCT_PACK(key := __k, value := COALESCE(:map2[__k], :map1[__k]))
1890            ), __x -> __x.value IS NOT NULL))
1891        END
1892        """
1893    )
1894
1895    # Mappings for EXTRACT/DATE_PART transpilation
1896    # Maps Snowflake specifiers unsupported in DuckDB to strftime format codes
1897    EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {
1898        "WEEKISO": ("%V", "INTEGER"),
1899        "YEAROFWEEK": ("%G", "INTEGER"),
1900        "YEAROFWEEKISO": ("%G", "INTEGER"),
1901        "NANOSECOND": ("%n", "BIGINT"),
1902    }
1903
1904    # Maps epoch-based specifiers to DuckDB epoch functions
1905    EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {
1906        "EPOCH_SECOND": "EPOCH",
1907        "EPOCH_MILLISECOND": "EPOCH_MS",
1908        "EPOCH_MICROSECOND": "EPOCH_US",
1909        "EPOCH_NANOSECOND": "EPOCH_NS",
1910    }
1911
1912    # Template for BITMAP_CONSTRUCT_AGG transpilation
1913    #
1914    # BACKGROUND:
1915    # Snowflake's BITMAP_CONSTRUCT_AGG aggregates integers into a compact binary bitmap.
1916    # Supports values in range 0-32767, this version returns NULL if any value is out of range
1917    # See: https://docs.snowflake.com/en/sql-reference/functions/bitmap_construct_agg
1918    # See: https://docs.snowflake.com/en/user-guide/querying-bitmaps-for-distinct-counts
1919    #
1920    # Snowflake uses two different formats based on the number of unique values:
1921    #
1922    # Format 1 - Small bitmap (< 5 unique values): Length of 10 bytes
1923    #   Bytes 0-1: Count of values as 2-byte big-endian integer (e.g., 3 values = 0x0003)
1924    #   Bytes 2-9: Up to 4 values, each as 2-byte little-endian integers, zero-padded to 8 bytes
1925    #   Example: Values [1, 2, 3] -> 0x0003 0100 0200 0300 0000 (hex)
1926    #                                count  v1   v2   v3   pad
1927    #
1928    # Format 2 - Large bitmap (>= 5 unique values): Length of 10 + (2 * count) bytes
1929    #   Bytes 0-9: Fixed header 0x08 followed by 9 zero bytes
1930    #   Bytes 10+: Each value as 2-byte little-endian integer (no padding)
1931    #   Example: Values [1,2,3,4,5] -> 0x08 00000000 00000000 00 0100 0200 0300 0400 0500
1932    #                                  hdr  ----9 zero bytes----  v1   v2   v3   v4   v5
1933    #
1934    # TEMPLATE STRUCTURE
1935    #
1936    # Phase 1 - Innermost subquery: Data preparation
1937    #   SELECT LIST_SORT(...) AS l
1938    #   - Aggregates all input values into a list, remove NULLs, duplicates and sorts
1939    #   Result: Clean, sorted list of unique non-null integers stored as 'l'
1940    #
1941    # Phase 2 - Middle subquery: Hex string construction
1942    #   LIST_TRANSFORM(...)
1943    #   - Converts each integer to 2-byte little-endian hex representation
1944    #   - & 255 extracts low byte, >> 8 extracts high byte
1945    #   - LIST_REDUCE: Concatenates all hex pairs into single string 'h'
1946    #   Result: Hex string of all values
1947    #
1948    # Phase 3 - Outer SELECT: Final bitmap assembly
1949    #   LENGTH(l) < 5:
1950    #   - Small format: 2-byte count (big-endian via %04X) + values + zero padding
1951    #   LENGTH(l) >= 5:
1952    #   - Large format: Fixed 10-byte header + values (no padding needed)
1953    #   Result: Complete binary bitmap as BLOB
1954    #
1955    BITMAP_CONSTRUCT_AGG_TEMPLATE: exp.Expr = exp.maybe_parse(
1956        """
1957        SELECT CASE
1958            WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL
1959            WHEN LENGTH(l) != LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL
1960            WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2))
1961            ELSE UNHEX('08000000000000000000' || h)
1962        END
1963        FROM (
1964            SELECT l, COALESCE(LIST_REDUCE(
1965                LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)),
1966                (__a, __b) -> __a || __b, ''
1967            ), '') AS h
1968            FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(:arg) FILTER(NOT :arg IS NULL))) AS l)
1969        )
1970        """
1971    )
1972
1973    # Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1974    RANDSTR_TEMPLATE: exp.Expr = exp.maybe_parse(
1975        f"""
1976        SELECT LISTAGG(
1977            SUBSTRING(
1978                '{RANDSTR_CHAR_POOL}',
1979                1 + CAST(FLOOR(random_value * 62) AS INT),
1980                1
1981            ),
1982            ''
1983        )
1984        FROM (
1985            SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1986            FROM RANGE(:length) AS t(i)
1987        )
1988        """,
1989    )
1990
1991    # Template for MINHASH transpilation
1992    # Computes k minimum hash values across aggregated data using DuckDB list functions
1993    # Returns JSON matching Snowflake format: {"state": [...], "type": "minhash", "version": 1}
1994    MINHASH_TEMPLATE: exp.Expr = exp.maybe_parse(
1995        """
1996        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed), 'type', 'minhash', 'version', 1)
1997        FROM (
1998            SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS VARCHAR) || CAST(seed AS VARCHAR)))) AS min_h
1999            FROM (SELECT LIST(:expr) AS vals), RANGE(0, :k) AS t(seed)
2000        )
2001        """,
2002    )
2003
2004    # Template for MINHASH_COMBINE transpilation
2005    # Combines multiple minhash signatures by taking element-wise minimum
2006    MINHASH_COMBINE_TEMPLATE: exp.Expr = exp.maybe_parse(
2007        """
2008        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx), 'type', 'minhash', 'version', 1)
2009        FROM (
2010            SELECT
2011                pos AS idx,
2012                MIN(val) AS min_h
2013            FROM
2014                UNNEST(LIST(:expr)) AS _(sig),
2015                UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos)
2016            GROUP BY pos
2017        )
2018        """,
2019    )
2020
2021    # Template for APPROXIMATE_SIMILARITY transpilation
2022    # Computes multi-way Jaccard similarity: fraction of positions where ALL signatures agree
2023    APPROXIMATE_SIMILARITY_TEMPLATE: exp.Expr = exp.maybe_parse(
2024        """
2025        SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*)
2026        FROM (
2027            SELECT pos, COUNT(DISTINCT h) AS num_distinct
2028            FROM (
2029                SELECT h, pos
2030                FROM UNNEST(LIST(:expr)) AS _(sig),
2031                     UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos)
2032            )
2033            GROUP BY pos
2034        )
2035        """,
2036    )
2037
2038    # Template for ARRAYS_ZIP transpilation
2039    # Snowflake pads to longest array; DuckDB LIST_ZIP truncates to shortest
2040    # Uses RANGE + indexing to match Snowflake behavior
2041    ARRAYS_ZIP_TEMPLATE: exp.Expr = exp.maybe_parse(
2042        """
2043        CASE WHEN :null_check THEN NULL
2044        WHEN :all_empty_check THEN [:empty_struct]
2045        ELSE LIST_TRANSFORM(RANGE(0, :max_len), __i -> :transform_struct)
2046        END
2047        """,
2048    )
2049
2050    # Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
2051    # Each element is paired with its 1-based position via LIST_ZIP, then filtered
2052    # by a comparison operator (supplied via :cond) that determines the operation:
2053    #   EXCEPT (>):        keep the N-th occurrence only if N > count in arr2
2054    #                      e.g. [2,2,2] EXCEPT [2,2] -> [2]
2055    #   INTERSECTION (<=): keep the N-th occurrence only if N <= count in arr2
2056    #                      e.g. [2,2,2] INTERSECT [2,2] -> [2,2]
2057    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2058    ARRAY_BAG_TEMPLATE: exp.Expr = exp.maybe_parse(
2059        """
2060        CASE
2061            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2062            ELSE LIST_TRANSFORM(
2063                LIST_FILTER(
2064                    LIST_ZIP(:arr1, GENERATE_SERIES(1, LEN(:arr1))),
2065                    pair -> :cond
2066                ),
2067                pair -> pair[0]
2068            )
2069        END
2070        """
2071    )
2072
2073    ARRAY_EXCEPT_CONDITION: exp.Expr = exp.maybe_parse(
2074        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2075        " > LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2076    )
2077
2078    ARRAY_INTERSECTION_CONDITION: exp.Expr = exp.maybe_parse(
2079        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2080        " <= LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2081    )
2082
2083    # Set semantics for ARRAY_EXCEPT. Deduplicates arr1 via LIST_DISTINCT, then
2084    # filters out any element that appears at least once in arr2.
2085    #   e.g. [1,1,2,3] EXCEPT [1] -> [2,3]
2086    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2087    ARRAY_EXCEPT_SET_TEMPLATE: exp.Expr = exp.maybe_parse(
2088        """
2089        CASE
2090            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2091            ELSE LIST_FILTER(
2092                LIST_DISTINCT(:arr1),
2093                e -> LEN(LIST_FILTER(:arr2, x -> x IS NOT DISTINCT FROM e)) = 0
2094            )
2095        END
2096        """
2097    )
2098
2099    # Template for STRTOK function transpilation
2100    #
2101    # DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
2102    # We may need to adjust this if we want to support transpilation from other dialects
2103    #
2104    # CASE
2105    #     -- Snowflake: empty delimiter + empty input string -> NULL
2106    #     WHEN delimiter = '' AND input_str = '' THEN NULL
2107    #
2108    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return input string if index is 1
2109    #     WHEN delimiter = '' AND index = 1 THEN input_str
2110    #
2111    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return NULL if index is not 1
2112    #     WHEN delimiter = '' THEN NULL
2113    #
2114    #     -- Snowflake: negative indices return NULL
2115    #     WHEN index < 0 THEN NULL
2116    #
2117    #     -- Snowflake: return NULL if any argument is NULL
2118    #     WHEN input_str IS NULL OR delimiter IS NULL OR index IS NULL THEN NULL
2119    #
2120    #
2121    #     ELSE LIST_FILTER(
2122    #         REGEXP_SPLIT_TO_ARRAY(
2123    #             input_str,
2124    #             CASE
2125    #                 -- if delimiter is '', we don't want to surround it with '[' and ']' as '[]' is invalid for DuckDB
2126    #                 WHEN delimiter = '' THEN ''
2127    #
2128    #                 -- handle problematic regex characters in delimiter with REGEXP_REPLACE
2129    #                 -- turn delimiter into a regex char set, otherwise DuckDB will match in order, which we don't want
2130    #                 ELSE '[' || REGEXP_REPLACE(delimiter, problematic_char_set, '\\\1', 'g') || ']'
2131    #             END
2132    #         ),
2133    #
2134    #         -- Snowflake: don't return empty strings
2135    #         x -> NOT x = ''
2136    #     )[index]
2137    # END
2138    STRTOK_TEMPLATE: exp.Expr = exp.maybe_parse(
2139        """
2140        CASE
2141            WHEN :delimiter = '' AND :string = '' THEN NULL
2142            WHEN :delimiter = '' AND :part_index = 1 THEN :string
2143            WHEN :delimiter = '' THEN NULL
2144            WHEN :part_index < 0 THEN NULL
2145            WHEN :string IS NULL OR :delimiter IS NULL OR :part_index IS NULL THEN NULL
2146            ELSE :base_func
2147        END
2148        """
2149    )
2150
2151    def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str:
2152        cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2))
2153        return self.sql(
2154            exp.replace_placeholders(self.ARRAY_BAG_TEMPLATE, arr1=arr1, arr2=arr2, cond=cond)
2155        )
2156
2157    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2158        """
2159        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2160
2161        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2162        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2163
2164        For 'END' kind, add the interval to get the end of the slice.
2165        For DATE type with 'END', cast result back to DATE to preserve type.
2166        """
2167        date_expr = expression.this
2168        slice_length = expression.expression
2169        unit = expression.unit
2170        kind = expression.text("kind").upper()
2171
2172        # Create INTERVAL expression: INTERVAL 'N' UNIT
2173        interval_expr = exp.Interval(this=slice_length, unit=unit)
2174
2175        # Create base time_bucket expression
2176        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2177
2178        # Check if we need the end of the slice (default is start)
2179        if not kind == "END":
2180            # For 'START', return time_bucket directly
2181            return self.sql(time_bucket_expr)
2182
2183        # For 'END', add the interval to get end of slice
2184        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2185
2186        # If input is DATE type, cast result back to DATE to preserve type
2187        # DuckDB converts DATE to TIMESTAMP when adding intervals
2188        if date_expr.is_type(exp.DType.DATE):
2189            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2190
2191        return self.sql(add_expr)
2192
2193    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2194        """
2195        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2196
2197        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2198        - Each bucket covers 32,768 values
2199        - Bucket numbering starts at 1
2200        - Formula: ((value - 1) // 32768) + 1 for positive values
2201
2202        For non-positive values (0 and negative), we use value // 32768 to avoid
2203        producing bucket 0 or positive bucket IDs for negative inputs.
2204        """
2205        value = expression.this
2206
2207        positive_formula = ((value - 1) // 32768) + 1
2208        non_positive_formula = value // 32768
2209
2210        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2211        case_expr = (
2212            exp.case()
2213            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2214            .else_(non_positive_formula)
2215        )
2216        return self.sql(case_expr)
2217
2218    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2219        """
2220        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2221
2222        Snowflake's BITMAP_BIT_POSITION behavior:
2223        - For n <= 0: returns ABS(n) % 32768
2224        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2225        """
2226        this = expression.this
2227
2228        return self.sql(
2229            exp.Mod(
2230                this=exp.Paren(
2231                    this=exp.If(
2232                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2233                        true=this - exp.Literal.number(1),
2234                        false=exp.Abs(this=this),
2235                    )
2236                ),
2237                expression=MAX_BIT_POSITION,
2238            )
2239        )
2240
2241    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2242        """
2243        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2244        Uses a pre-parsed template with placeholders replaced by expression nodes.
2245
2246        Snowflake bitmap format:
2247        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2248        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2249        """
2250        arg = expression.this
2251        return (
2252            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2253        )
2254
2255    def compress_sql(self, expression: exp.Compress) -> str:
2256        self.unsupported("DuckDB does not support the COMPRESS() function")
2257        return self.function_fallback_sql(expression)
2258
2259    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2260        self.unsupported("ENCRYPT is not supported in DuckDB")
2261        return self.function_fallback_sql(expression)
2262
2263    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2264        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2265        self.unsupported(f"{func_name} is not supported in DuckDB")
2266        return self.function_fallback_sql(expression)
2267
2268    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2269        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2270        self.unsupported(f"{func_name} is not supported in DuckDB")
2271        return self.function_fallback_sql(expression)
2272
2273    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2274        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2275        return self.function_fallback_sql(expression)
2276
2277    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2278        self.unsupported("PARSE_URL is not supported in DuckDB")
2279        return self.function_fallback_sql(expression)
2280
2281    def parseip_sql(self, expression: exp.ParseIp) -> str:
2282        self.unsupported("PARSE_IP is not supported in DuckDB")
2283        return self.function_fallback_sql(expression)
2284
2285    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2286        this = expression.this
2287        expr = expression.expression
2288
2289        if expression.args.get("case_insensitive"):
2290            this = exp.Upper(this=this)
2291            expr = exp.Upper(this=expr)
2292
2293        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2294
2295        if expression.args.get("integer_scale"):
2296            result = exp.cast(result * 100, "INTEGER")
2297
2298        return self.sql(result)
2299
2300    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2301        from_first = expression.args.get("from_first", True)
2302        if not from_first:
2303            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2304
2305        return self.function_fallback_sql(expression)
2306
2307    def randstr_sql(self, expression: exp.Randstr) -> str:
2308        """
2309        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2310        Uses a pre-parsed template with placeholders replaced by expression nodes.
2311
2312        RANDSTR(length, generator) generates a random string of specified length.
2313        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2314        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2315        - No generator: Use default seed value
2316        """
2317        length = expression.this
2318        generator = expression.args.get("generator")
2319
2320        if generator:
2321            if isinstance(generator, exp.Rand):
2322                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2323                seed_value = generator.this or generator
2324            else:
2325                # Const/int or other expression - use as seed directly
2326                seed_value = generator
2327        else:
2328            # No generator specified, use default seed (arbitrary but deterministic)
2329            seed_value = exp.Literal.number(RANDSTR_SEED)
2330
2331        replacements = {"seed": seed_value, "length": length}
2332        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
2333
2334    @unsupported_args("finish")
2335    def reduce_sql(self, expression: exp.Reduce) -> str:
2336        array_arg = expression.this
2337        initial_value = expression.args.get("initial")
2338        merge_lambda = expression.args.get("merge")
2339
2340        if merge_lambda:
2341            merge_lambda.set("colon", True)
2342
2343        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
2344
2345    def zipf_sql(self, expression: exp.Zipf) -> str:
2346        """
2347        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2348        Uses a pre-parsed template with placeholders replaced by expression nodes.
2349        """
2350        s = expression.this
2351        n = expression.args["elementcount"]
2352        gen = expression.args["gen"]
2353
2354        if not isinstance(gen, exp.Rand):
2355            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2356            random_expr: exp.Expr = exp.Div(
2357                this=exp.Paren(
2358                    this=exp.Mod(
2359                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2360                        expression=exp.Literal.number(1000000),
2361                    )
2362                ),
2363                expression=exp.Literal.number(1000000.0),
2364            )
2365        else:
2366            # Use RANDOM() for non-deterministic output
2367            random_expr = exp.Rand()
2368
2369        replacements = {"s": s, "n": n, "random_expr": random_expr}
2370        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
2371
2372    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2373        """
2374        TO_BINARY and TRY_TO_BINARY transpilation:
2375        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2376        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2377        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2378
2379        For TRY_TO_BINARY (safe=True), wrap with TRY():
2380        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2381        """
2382        value = expression.this
2383        format_arg = expression.args.get("format")
2384        is_safe = expression.args.get("safe")
2385        is_binary = _is_binary(expression)
2386
2387        if not format_arg and not is_binary:
2388            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2389            return self.func(func_name, value)
2390
2391        # Snowflake defaults to HEX encoding when no format is specified
2392        fmt = format_arg.name.upper() if format_arg else "HEX"
2393
2394        if fmt in ("UTF-8", "UTF8"):
2395            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2396            result = self.func("ENCODE", value)
2397        elif fmt == "BASE64":
2398            result = self.func("FROM_BASE64", value)
2399        elif fmt == "HEX":
2400            result = self.func("UNHEX", value)
2401        else:
2402            if is_safe:
2403                return self.sql(exp.null())
2404            else:
2405                self.unsupported(f"format {fmt} is not supported")
2406                result = self.func("TO_BINARY", value)
2407        return f"TRY({result})" if is_safe else result
2408
2409    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2410        fmt = expression.args.get("format")
2411        precision = expression.args.get("precision")
2412        scale = expression.args.get("scale")
2413
2414        if not fmt and precision and scale:
2415            return self.sql(
2416                exp.cast(
2417                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2418                )
2419            )
2420
2421        return super().tonumber_sql(expression)
2422
2423    def _greatest_least_sql(self, expression: exp.Greatest | exp.Least) -> str:
2424        """
2425        Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
2426
2427        - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL
2428        - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
2429        """
2430        # Get all arguments
2431        all_args = [expression.this, *expression.expressions]
2432        fallback_sql = self.function_fallback_sql(expression)
2433
2434        if expression.args.get("ignore_nulls"):
2435            # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
2436            return self.sql(fallback_sql)
2437
2438        # return NULL if any argument is NULL
2439        case_expr = exp.case().when(
2440            exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
2441            exp.null(),
2442            copy=False,
2443        )
2444        case_expr.set("default", fallback_sql)
2445        return self.sql(case_expr)
2446
2447    def generator_sql(self, expression: exp.Generator) -> str:
2448        # Transpile Snowflake GENERATOR to DuckDB range()
2449        rowcount = expression.args.get("rowcount")
2450        time_limit = expression.args.get("time_limit")
2451
2452        if time_limit:
2453            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2454
2455        if not rowcount:
2456            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2457            return self.func("range", exp.Literal.number(0))
2458
2459        return self.func("range", rowcount)
2460
2461    def greatest_sql(self, expression: exp.Greatest) -> str:
2462        return self._greatest_least_sql(expression)
2463
2464    def least_sql(self, expression: exp.Least) -> str:
2465        return self._greatest_least_sql(expression)
2466
2467    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2468        if expression.args.get("colon"):
2469            prefix = "LAMBDA "
2470            arrow_sep = ":"
2471            wrap = False
2472        else:
2473            prefix = ""
2474
2475        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2476        return f"{prefix}{lambda_sql}"
2477
2478    def show_sql(self, expression: exp.Show) -> str:
2479        from_ = self.sql(expression, "from_")
2480        from_ = f" FROM {from_}" if from_ else ""
2481        return f"SHOW {expression.name}{from_}"
2482
2483    def soundex_sql(self, expression: exp.Soundex) -> str:
2484        self.unsupported("SOUNDEX is not supported in DuckDB")
2485        return self.func("SOUNDEX", expression.this)
2486
2487    def sortarray_sql(self, expression: exp.SortArray) -> str:
2488        arr = expression.this
2489        asc = expression.args.get("asc")
2490        nulls_first = expression.args.get("nulls_first")
2491
2492        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2493            return self.func("LIST_SORT", arr, asc, nulls_first)
2494
2495        nulls_are_first = nulls_first == exp.true()
2496        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2497
2498        if not isinstance(asc, exp.Boolean):
2499            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2500
2501        descending = asc == exp.false()
2502
2503        if not descending and not nulls_are_first:
2504            return self.func("LIST_SORT", arr)
2505        if not nulls_are_first:
2506            return self.func("ARRAY_REVERSE_SORT", arr)
2507        return self.func(
2508            "LIST_SORT",
2509            arr,
2510            exp.Literal.string("DESC" if descending else "ASC"),
2511            exp.Literal.string("NULLS FIRST"),
2512        )
2513
2514    def install_sql(self, expression: exp.Install) -> str:
2515        force = "FORCE " if expression.args.get("force") else ""
2516        this = self.sql(expression, "this")
2517        from_clause = expression.args.get("from_")
2518        from_clause = f" FROM {from_clause}" if from_clause else ""
2519        return f"{force}INSTALL {this}{from_clause}"
2520
2521    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2522        self.unsupported(
2523            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2524        )
2525        return self.function_fallback_sql(expression)
2526
2527    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2528        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
2529
2530    def strposition_sql(self, expression: exp.StrPosition) -> str:
2531        this = expression.this
2532        substr = expression.args.get("substr")
2533        position = expression.args.get("position")
2534
2535        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2536        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2537        if _is_binary(this):
2538            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2539            hex_strpos = exp.StrPosition(
2540                this=exp.Hex(this=this),
2541                substr=exp.Hex(this=substr),
2542            )
2543
2544            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2545
2546        # For VARCHAR: handle clamp_position
2547        if expression.args.get("clamp_position") and position:
2548            expression = expression.copy()
2549            expression.set(
2550                "position",
2551                exp.If(
2552                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2553                    true=exp.Literal.number(1),
2554                    false=position.copy(),
2555                ),
2556            )
2557
2558        return strposition_sql(self, expression)
2559
2560    def substring_sql(self, expression: exp.Substring) -> str:
2561        if expression.args.get("zero_start"):
2562            start = expression.args.get("start")
2563            length = expression.args.get("length")
2564
2565            if start := expression.args.get("start"):
2566                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2567            if length := expression.args.get("length"):
2568                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2569
2570            return self.func("SUBSTRING", expression.this, start, length)
2571
2572        return self.function_fallback_sql(expression)
2573
2574    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2575        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2576        target_type = expression.args.get("target_type")
2577        needs_tz = target_type and target_type.this in (
2578            exp.DType.TIMESTAMPLTZ,
2579            exp.DType.TIMESTAMPTZ,
2580        )
2581
2582        if expression.args.get("safe"):
2583            formatted_time = self.format_time(expression)
2584            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2585            return self.sql(
2586                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2587            )
2588
2589        base_sql = str_to_time_sql(self, expression)
2590        if needs_tz:
2591            return self.sql(
2592                exp.cast(
2593                    base_sql,
2594                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2595                )
2596            )
2597        return base_sql
2598
2599    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2600        formatted_time = self.format_time(expression)
2601        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2602        return self.sql(
2603            exp.cast(
2604                self.func(function_name, expression.this, formatted_time),
2605                exp.DataType(this=exp.DType.DATE),
2606            )
2607        )
2608
2609    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2610        this = expression.this
2611        time_format = self.format_time(expression)
2612        safe = expression.args.get("safe")
2613        time_type = exp.DataType.build("TIME", dialect="duckdb")
2614        cast_expr = exp.TryCast if safe else exp.Cast
2615
2616        if time_format:
2617            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2618            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2619            return self.sql(cast_expr(this=strptime, to=time_type))
2620
2621        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2622            return self.sql(this)
2623
2624        return self.sql(cast_expr(this=this, to=time_type))
2625
2626    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2627        if not expression.this:
2628            return "CURRENT_DATE"
2629
2630        expr = exp.Cast(
2631            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2632            to=exp.DataType(this=exp.DType.DATE),
2633        )
2634        return self.sql(expr)
2635
2636    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2637        arg = expression.this
2638        return self.sql(
2639            exp.case()
2640            .when(
2641                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2642                exp.null(),
2643            )
2644            .else_(exp.Literal.string("Invalid JSON"))
2645        )
2646
2647    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2648        arg = expression.this
2649        if expression.args.get("safe"):
2650            return self.sql(
2651                exp.case()
2652                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2653                .else_(exp.null())
2654            )
2655        return self.func("JSON", arg)
2656
2657    def unicode_sql(self, expression: exp.Unicode) -> str:
2658        if expression.args.get("empty_is_zero"):
2659            return self.sql(
2660                exp.case()
2661                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2662                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2663            )
2664
2665        return self.func("UNICODE", expression.this)
2666
2667    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2668        return self.sql(
2669            exp.case()
2670            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2671            .else_(expression.this)
2672        )
2673
2674    def trunc_sql(self, expression: exp.Trunc) -> str:
2675        decimals = expression.args.get("decimals")
2676        if (
2677            expression.args.get("fractions_supported")
2678            and decimals
2679            and not decimals.is_type(exp.DType.INT)
2680        ):
2681            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2682
2683        return self.func("TRUNC", expression.this, decimals)
2684
2685    def normal_sql(self, expression: exp.Normal) -> str:
2686        """
2687        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2688
2689        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2690        """
2691        mean = expression.this
2692        stddev = expression.args["stddev"]
2693        gen: exp.Expr = expression.args["gen"]
2694
2695        # Build two uniform random values [0, 1) for Box-Muller transform
2696        if isinstance(gen, exp.Rand) and gen.this is None:
2697            u1: exp.Expr = exp.Rand()
2698            u2: exp.Expr = exp.Rand()
2699        else:
2700            # Seeded: derive two values using HASH with different inputs
2701            seed = gen.this if isinstance(gen, exp.Rand) else gen
2702            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2703            u2 = exp.replace_placeholders(
2704                self.SEEDED_RANDOM_TEMPLATE,
2705                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2706            )
2707
2708        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2709        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
2710
2711    def uniform_sql(self, expression: exp.Uniform) -> str:
2712        """
2713        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2714
2715        UNIFORM returns a random value in [min, max]:
2716        - Integer result if both min and max are integers
2717        - Float result if either min or max is a float
2718        """
2719        min_val = expression.this
2720        max_val = expression.expression
2721        gen = expression.args.get("gen")
2722
2723        # Determine if result should be integer (both bounds are integers).
2724        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2725        is_int_result = min_val.is_int and max_val.is_int
2726
2727        # Build the random value expression [0, 1)
2728        if not isinstance(gen, exp.Rand):
2729            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2730            random_expr: exp.Expr = exp.Div(
2731                this=exp.Paren(
2732                    this=exp.Mod(
2733                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2734                        expression=exp.Literal.number(1000000),
2735                    )
2736                ),
2737                expression=exp.Literal.number(1000000.0),
2738            )
2739        else:
2740            random_expr = exp.Rand()
2741
2742        # Build: min + random * (max - min [+ 1 for int])
2743        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2744        if is_int_result:
2745            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2746
2747        result: exp.Expr = exp.Add(
2748            this=min_val,
2749            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2750        )
2751
2752        if is_int_result:
2753            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2754
2755        return self.sql(result)
2756
2757    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2758        nano = expression.args.get("nano")
2759        overflow = expression.args.get("overflow")
2760
2761        # Snowflake's TIME_FROM_PARTS supports overflow
2762        if overflow:
2763            hour = expression.args["hour"]
2764            minute = expression.args["min"]
2765            sec = expression.args["sec"]
2766
2767            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2768            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2769                try:
2770                    h_val = hour.to_py()
2771                    m_val = minute.to_py()
2772                    s_val = sec.to_py()
2773                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2774                        return rename_func("MAKE_TIME")(self, expression)
2775                except ValueError:
2776                    pass
2777
2778            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2779            if nano:
2780                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2781
2782            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2783
2784            return self.sql(
2785                exp.Add(
2786                    this=exp.Cast(
2787                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2788                    ),
2789                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2790                )
2791            )
2792
2793        # Default: MAKE_TIME
2794        if nano:
2795            expression.set(
2796                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2797            )
2798
2799        return rename_func("MAKE_TIME")(self, expression)
2800
2801    def extract_sql(self, expression: exp.Extract) -> str:
2802        """
2803        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2804
2805        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2806        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2807        """
2808        this = expression.this
2809        datetime_expr = expression.expression
2810
2811        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2812        # because Snowflake applies server timezone while DuckDB uses local timezone
2813        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2814            self.unsupported(
2815                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2816            )
2817
2818        part_name = this.name.upper()
2819
2820        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2821            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2822
2823            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2824            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2825            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2826                exp.DType.TIME, exp.DType.TIMETZ
2827            )
2828
2829            if is_nano_time:
2830                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2831                return self.sql(
2832                    exp.cast(
2833                        exp.Mul(
2834                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2835                            expression=exp.Literal.number(1000),
2836                        ),
2837                        exp.DataType.build(cast_type, dialect="duckdb"),
2838                    )
2839                )
2840
2841            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2842            strftime_input = datetime_expr
2843            if part_name == "NANOSECOND":
2844                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2845
2846            return self.sql(
2847                exp.cast(
2848                    exp.Anonymous(
2849                        this="STRFTIME",
2850                        expressions=[strftime_input, exp.Literal.string(fmt)],
2851                    ),
2852                    exp.DataType.build(cast_type, dialect="duckdb"),
2853                )
2854            )
2855
2856        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2857            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2858            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2859            # EPOCH returns float, cast to BIGINT for integer result
2860            if part_name == "EPOCH_SECOND":
2861                result = exp.cast(result, exp.DataType.build("BIGINT", dialect="duckdb"))
2862            return self.sql(result)
2863
2864        return super().extract_sql(expression)
2865
2866    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2867        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2868        date_expr = expression.this
2869        time_expr = expression.expression
2870
2871        if date_expr is not None and time_expr is not None:
2872            # In DuckDB, DATE + TIME produces TIMESTAMP
2873            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2874
2875        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2876        sec = expression.args.get("sec")
2877        if sec is None:
2878            # This shouldn't happen with valid input, but handle gracefully
2879            return rename_func("MAKE_TIMESTAMP")(self, expression)
2880
2881        milli = expression.args.get("milli")
2882        if milli is not None:
2883            sec += milli.pop() / exp.Literal.number(1000.0)
2884
2885        nano = expression.args.get("nano")
2886        if nano is not None:
2887            sec += nano.pop() / exp.Literal.number(1000000000.0)
2888
2889        if milli or nano:
2890            expression.set("sec", sec)
2891
2892        return rename_func("MAKE_TIMESTAMP")(self, expression)
2893
2894    @unsupported_args("nano")
2895    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2896        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2897        if nano := expression.args.get("nano"):
2898            nano.pop()
2899
2900        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2901        return f"CAST({timestamp} AS TIMESTAMPTZ)"
2902
2903    @unsupported_args("nano")
2904    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2905        # Extract zone before popping
2906        zone = expression.args.get("zone")
2907        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2908        if zone:
2909            zone = zone.pop()
2910
2911        if nano := expression.args.get("nano"):
2912            nano.pop()
2913
2914        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2915
2916        if zone:
2917            # Use AT TIME ZONE to apply the explicit timezone
2918            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2919
2920        return timestamp
2921
2922    def tablesample_sql(
2923        self,
2924        expression: exp.TableSample,
2925        tablesample_keyword: str | None = None,
2926    ) -> str:
2927        if not isinstance(expression.parent, exp.Select):
2928            # This sample clause only applies to a single source, not the entire resulting relation
2929            tablesample_keyword = "TABLESAMPLE"
2930
2931        if expression.args.get("size"):
2932            method = expression.args.get("method")
2933            if method and method.name.upper() != "RESERVOIR":
2934                self.unsupported(
2935                    f"Sampling method {method} is not supported with a discrete sample count, "
2936                    "defaulting to reservoir sampling"
2937                )
2938                expression.set("method", exp.var("RESERVOIR"))
2939
2940        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
2941
2942    def join_sql(self, expression: exp.Join) -> str:
2943        if (
2944            not expression.args.get("using")
2945            and not expression.args.get("on")
2946            and not expression.method
2947            and (expression.kind in ("", "INNER", "OUTER"))
2948        ):
2949            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2950            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2951            if isinstance(expression.this, exp.Unnest):
2952                return super().join_sql(expression.on(exp.true()))
2953
2954            expression.set("side", None)
2955            expression.set("kind", None)
2956
2957        return super().join_sql(expression)
2958
2959    def countif_sql(self, expression: exp.CountIf) -> str:
2960        if self.dialect.version >= (1, 2):
2961            return self.function_fallback_sql(expression)
2962
2963        # https://github.com/tobymao/sqlglot/pull/4749
2964        return count_if_to_sum(self, expression)
2965
2966    def bracket_sql(self, expression: exp.Bracket) -> str:
2967        if self.dialect.version >= (1, 2):
2968            return super().bracket_sql(expression)
2969
2970        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
2971        this = expression.this
2972        if isinstance(this, exp.Array):
2973            this.replace(exp.paren(this))
2974
2975        bracket = super().bracket_sql(expression)
2976
2977        if not expression.args.get("returns_list_for_maps"):
2978            if not this.type:
2979                from sqlglot.optimizer.annotate_types import annotate_types
2980
2981                this = annotate_types(this, dialect=self.dialect)
2982
2983            if this.is_type(exp.DType.MAP):
2984                bracket = f"({bracket})[1]"
2985
2986        return bracket
2987
2988    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
2989        func = expression.this
2990
2991        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
2992        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
2993        if isinstance(func, exp.ArrayAgg):
2994            if not isinstance(order := expression.expression, exp.Order):
2995                return self.sql(func)
2996
2997            # Save the original column for FILTER clause (before wrapping with Order)
2998            original_this = func.this
2999
3000            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3001            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3002            func.set(
3003                "this",
3004                exp.Order(
3005                    this=func.this.copy(),
3006                    expressions=order.expressions,
3007                ),
3008            )
3009
3010            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3011            # Use original_this (not the Order-wrapped version) for the FILTER condition
3012            array_agg_sql = self.function_fallback_sql(func)
3013            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3014
3015        # For other functions (like PERCENTILES), use existing logic
3016        expression_sql = self.sql(expression, "expression")
3017
3018        if isinstance(func, exp.PERCENTILES):
3019            # Make the order key the first arg and slide the fraction to the right
3020            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3021            order_col = expression.find(exp.Ordered)
3022            if order_col:
3023                func.set("expression", func.this)
3024                func.set("this", order_col.this)
3025
3026        this = self.sql(expression, "this").rstrip(")")
3027
3028        return f"{this}{expression_sql})"
3029
3030    def length_sql(self, expression: exp.Length) -> str:
3031        arg = expression.this
3032
3033        # Dialects like BQ and Snowflake also accept binary values as args, so
3034        # DDB will attempt to infer the type or resort to case/when resolution
3035        if not expression.args.get("binary") or arg.is_string:
3036            return self.func("LENGTH", arg)
3037
3038        if not arg.type:
3039            from sqlglot.optimizer.annotate_types import annotate_types
3040
3041            arg = annotate_types(arg, dialect=self.dialect)
3042
3043        if arg.is_type(*exp.DataType.TEXT_TYPES):
3044            return self.func("LENGTH", arg)
3045
3046        # We need these casts to make duckdb's static type checker happy
3047        blob = exp.cast(arg, exp.DType.VARBINARY)
3048        varchar = exp.cast(arg, exp.DType.VARCHAR)
3049
3050        case = (
3051            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3052            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3053            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3054        )
3055        return self.sql(case)
3056
3057    def bitlength_sql(self, expression: exp.BitLength) -> str:
3058        if not _is_binary(arg := expression.this):
3059            return self.func("BIT_LENGTH", arg)
3060
3061        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3062        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
3063
3064    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3065        arg = expression.expressions[0]
3066        if arg.is_type(*exp.DataType.REAL_TYPES):
3067            arg = exp.cast(arg, exp.DType.INT)
3068        return self.func("CHR", arg)
3069
3070    def collation_sql(self, expression: exp.Collation) -> str:
3071        self.unsupported("COLLATION function is not supported by DuckDB")
3072        return self.function_fallback_sql(expression)
3073
3074    def collate_sql(self, expression: exp.Collate) -> str:
3075        if not expression.expression.is_string:
3076            return super().collate_sql(expression)
3077
3078        raw = expression.expression.name
3079        if not raw:
3080            return self.sql(expression.this)
3081
3082        parts = []
3083        for part in raw.split("-"):
3084            lower = part.lower()
3085            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3086                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3087                    self.unsupported(
3088                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3089                    )
3090                parts.append(lower)
3091
3092        if not parts:
3093            return self.sql(expression.this)
3094        return super().collate_sql(
3095            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3096        )
3097
3098    def _validate_regexp_flags(self, flags: exp.Expr | None, supported_flags: str) -> str | None:
3099        """
3100        Validate and filter regexp flags for DuckDB compatibility.
3101
3102        Args:
3103            flags: The flags expression to validate
3104            supported_flags: String of supported flags (e.g., "ims", "cims").
3105                            Only these flags will be returned.
3106
3107        Returns:
3108            Validated/filtered flag string, or None if no valid flags remain
3109        """
3110        if not isinstance(flags, exp.Expr):
3111            return None
3112
3113        if not flags.is_string:
3114            self.unsupported("Non-literal regexp flags are not fully supported in DuckDB")
3115            return None
3116
3117        flag_str = flags.this
3118        unsupported = set(flag_str) - set(supported_flags)
3119
3120        if unsupported:
3121            self.unsupported(
3122                f"Regexp flags {sorted(unsupported)} are not supported in this context"
3123            )
3124
3125        flag_str = "".join(f for f in flag_str if f in supported_flags)
3126        return flag_str if flag_str else None
3127
3128    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3129        this = expression.this
3130        pattern = expression.expression
3131        position = expression.args.get("position")
3132        parameters = expression.args.get("parameters")
3133
3134        # Validate flags - only "ims" flags are supported for embedded patterns
3135        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3136
3137        if position:
3138            this = exp.Substring(this=this, start=position)
3139
3140        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3141        if validated_flags:
3142            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3143
3144        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3145        result = (
3146            exp.case()
3147            .when(
3148                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3149                exp.Literal.number(0),
3150            )
3151            .else_(
3152                exp.Length(
3153                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3154                )
3155            )
3156        )
3157
3158        return self.sql(result)
3159
3160    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3161        subject = expression.this
3162        pattern = expression.expression
3163        replacement = expression.args.get("replacement") or exp.Literal.string("")
3164        position = expression.args.get("position")
3165        occurrence = expression.args.get("occurrence")
3166        modifiers = expression.args.get("modifiers")
3167
3168        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3169
3170        # Handle occurrence (only literals supported)
3171        if occurrence and not occurrence.is_int:
3172            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3173        else:
3174            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3175            if occurrence > 1:
3176                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3177            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3178            elif (
3179                occurrence == 0
3180                and "g" not in validated_flags
3181                and not expression.args.get("single_replace")
3182            ):
3183                validated_flags += "g"
3184
3185        # Handle position (only literals supported)
3186        prefix = None
3187        if position and not position.is_int:
3188            self.unsupported("REGEXP_REPLACE with non-literal position")
3189        elif position and position.is_int and position.to_py() > 1:
3190            pos = position.to_py()
3191            prefix = exp.Substring(
3192                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3193            )
3194            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3195
3196        result: exp.Expr = exp.Anonymous(
3197            this="REGEXP_REPLACE",
3198            expressions=[
3199                subject,
3200                pattern,
3201                replacement,
3202                exp.Literal.string(validated_flags) if validated_flags else None,
3203            ],
3204        )
3205
3206        if prefix:
3207            result = exp.Concat(expressions=[prefix, result])
3208
3209        return self.sql(result)
3210
3211    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3212        this = expression.this
3213        pattern = expression.expression
3214        flag = expression.args.get("flag")
3215
3216        if expression.args.get("full_match"):
3217            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3218            flag = exp.Literal.string(validated_flags) if validated_flags else None
3219            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3220
3221        return self.func("REGEXP_MATCHES", this, pattern, flag)
3222
3223    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3224    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3225        this = expression.this
3226        expr = expression.expression
3227        max_dist = expression.args.get("max_dist")
3228
3229        if max_dist is None:
3230            return self.func("LEVENSHTEIN", this, expr)
3231
3232        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3233        levenshtein = exp.Levenshtein(this=this, expression=expr)
3234        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
3235
3236    def pad_sql(self, expression: exp.Pad) -> str:
3237        """
3238        Handle RPAD/LPAD for VARCHAR and BINARY types.
3239
3240        For VARCHAR: Delegate to parent class
3241        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3242        """
3243        string_arg = expression.this
3244        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3245
3246        if _is_binary(string_arg) or _is_binary(fill_arg):
3247            length_arg = expression.expression
3248            is_left = expression.args.get("is_left")
3249
3250            input_len = exp.ByteLength(this=string_arg)
3251            chars_needed = length_arg - input_len
3252            pad_count = exp.Greatest(
3253                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3254            )
3255            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3256
3257            left, right = string_arg, repeat_expr
3258            if is_left:
3259                left, right = right, left
3260
3261            result = exp.DPipe(this=left, expression=right)
3262            return self.sql(result)
3263
3264        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3265        return super().pad_sql(expression)
3266
3267    def minhash_sql(self, expression: exp.Minhash) -> str:
3268        k = expression.this
3269        exprs = expression.expressions
3270
3271        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3272            self.unsupported(
3273                "MINHASH with multiple expressions or * requires manual query restructuring"
3274            )
3275            return self.func("MINHASH", k, *exprs)
3276
3277        expr = exprs[0]
3278        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3279        return f"({self.sql(result)})"
3280
3281    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3282        expr = expression.this
3283        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3284        return f"({self.sql(result)})"
3285
3286    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3287        expr = expression.this
3288        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3289        return f"({self.sql(result)})"
3290
3291    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3292        return self.sql(
3293            exp.Filter(
3294                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3295                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3296            )
3297        )
3298
3299    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3300        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3301        return self.function_fallback_sql(expression)
3302
3303    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3304        arr = expression.this
3305        func = self.func("LIST_DISTINCT", arr)
3306
3307        if expression.args.get("check_null"):
3308            add_null_to_array = exp.func(
3309                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3310            )
3311            return self.sql(
3312                exp.If(
3313                    this=exp.NEQ(
3314                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3315                    ),
3316                    true=add_null_to_array,
3317                    false=func,
3318                )
3319            )
3320
3321        return func
3322
3323    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3324        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3325            return self._array_bag_sql(
3326                self.ARRAY_INTERSECTION_CONDITION,
3327                expression.expressions[0],
3328                expression.expressions[1],
3329            )
3330        return self.function_fallback_sql(expression)
3331
3332    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3333        arr1, arr2 = expression.this, expression.expression
3334        if expression.args.get("is_multiset"):
3335            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3336        return self.sql(
3337            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3338        )
3339
3340    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3341        """
3342        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3343        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3344        expressions that adjust the index at query time:
3345          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3346          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3347        """
3348        start, end = expression.args.get("start"), expression.args.get("end")
3349
3350        if expression.args.get("zero_based"):
3351            if start is not None:
3352                start = (
3353                    exp.case()
3354                    .when(
3355                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3356                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3357                    )
3358                    .else_(start)
3359                )
3360            if end is not None:
3361                end = (
3362                    exp.case()
3363                    .when(
3364                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3365                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3366                    )
3367                    .else_(end)
3368                )
3369
3370        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))
3371
3372    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3373        args = expression.expressions
3374
3375        if not args:
3376            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3377            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3378
3379        # Build placeholder values for template
3380        lengths = [exp.Length(this=arg) for arg in args]
3381        max_len = (
3382            lengths[0]
3383            if len(lengths) == 1
3384            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3385        )
3386
3387        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3388        empty_struct = exp.func(
3389            "STRUCT",
3390            *[
3391                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3392                for i in range(len(args))
3393            ],
3394        )
3395
3396        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3397        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3398        index = exp.column("__i") + 1
3399        transform_struct = exp.func(
3400            "STRUCT",
3401            *[
3402                exp.PropertyEQ(
3403                    this=exp.Literal.string(f"${i + 1}"),
3404                    expression=exp.func("COALESCE", arg, exp.array())[index],
3405                )
3406                for i, arg in enumerate(args)
3407            ],
3408        )
3409
3410        result = exp.replace_placeholders(
3411            self.ARRAYS_ZIP_TEMPLATE.copy(),
3412            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3413            all_empty_check=exp.and_(
3414                *[
3415                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3416                    for arg in args
3417                ]
3418            ),
3419            empty_struct=empty_struct,
3420            max_len=max_len,
3421            transform_struct=transform_struct,
3422        )
3423        return self.sql(result)
3424
3425    def lower_sql(self, expression: exp.Lower) -> str:
3426        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3427        return _gen_with_cast_to_blob(self, expression, result_sql)
3428
3429    def upper_sql(self, expression: exp.Upper) -> str:
3430        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3431        return _gen_with_cast_to_blob(self, expression, result_sql)
3432
3433    def reverse_sql(self, expression: exp.Reverse) -> str:
3434        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3435        return _gen_with_cast_to_blob(self, expression, result_sql)
3436
3437    def _left_right_sql(self, expression: exp.Left | exp.Right, func_name: str) -> str:
3438        arg = expression.this
3439        length = expression.expression
3440        is_binary = _is_binary(arg)
3441
3442        if is_binary:
3443            # LEFT/RIGHT(blob, n) becomes UNHEX(LEFT/RIGHT(HEX(blob), n * 2))
3444            # Each byte becomes 2 hex chars, so multiply length by 2
3445            hex_arg = exp.Hex(this=arg)
3446            hex_length = exp.Mul(this=length, expression=exp.Literal.number(2))
3447            result: exp.Expression = exp.Unhex(
3448                this=exp.Anonymous(this=func_name, expressions=[hex_arg, hex_length])
3449            )
3450        else:
3451            result = exp.Anonymous(this=func_name, expressions=[arg, length])
3452
3453        if expression.args.get("negative_length_returns_empty"):
3454            empty: exp.Expression = exp.Literal.string("")
3455            if is_binary:
3456                empty = exp.Unhex(this=empty)
3457            result = exp.case().when(length < exp.Literal.number(0), empty).else_(result)
3458
3459        return self.sql(result)
3460
3461    def left_sql(self, expression: exp.Left) -> str:
3462        return self._left_right_sql(expression, "LEFT")
3463
3464    def right_sql(self, expression: exp.Right) -> str:
3465        return self._left_right_sql(expression, "RIGHT")
3466
3467    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3468        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
3469
3470    def stuff_sql(self, expression: exp.Stuff) -> str:
3471        base = expression.this
3472        start = expression.args["start"]
3473        length = expression.args["length"]
3474        insertion = expression.expression
3475        is_binary = _is_binary(base)
3476
3477        if is_binary:
3478            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3479            # (each byte = 2 hex chars), then UNHEX back to BLOB
3480            base = exp.Hex(this=base)
3481            insertion = exp.Hex(this=insertion)
3482            left = exp.Substring(
3483                this=base.copy(),
3484                start=exp.Literal.number(1),
3485                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3486            )
3487            right = exp.Substring(
3488                this=base.copy(),
3489                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3490                + exp.Literal.number(1),
3491            )
3492        else:
3493            left = exp.Substring(
3494                this=base.copy(),
3495                start=exp.Literal.number(1),
3496                length=start.copy() - exp.Literal.number(1),
3497            )
3498            right = exp.Substring(this=base.copy(), start=start + length)
3499        result: exp.Expr = exp.DPipe(
3500            this=exp.DPipe(this=left, expression=insertion), expression=right
3501        )
3502
3503        if is_binary:
3504            result = exp.Unhex(this=result)
3505
3506        return self.sql(result)
3507
3508    def rand_sql(self, expression: exp.Rand) -> str:
3509        seed = expression.this
3510        if seed is not None:
3511            self.unsupported("RANDOM with seed is not supported in DuckDB")
3512
3513        lower = expression.args.get("lower")
3514        upper = expression.args.get("upper")
3515
3516        if lower and upper:
3517            # scale DuckDB's [0,1) to the specified range
3518            range_size = exp.paren(upper - lower)
3519            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3520
3521            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3522            result = exp.cast(scaled, exp.DType.BIGINT)
3523            return self.sql(result)
3524
3525        # Default DuckDB behavior - just return RANDOM() as float
3526        return "RANDOM()"
3527
3528    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3529        arg = expression.this
3530
3531        # Check if it's a text type (handles both literals and annotated expressions)
3532        if arg.is_type(*exp.DataType.TEXT_TYPES):
3533            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3534
3535        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3536        return self.func("OCTET_LENGTH", arg)
3537
3538    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3539        # DuckDB TO_BASE64 requires BLOB input
3540        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3541        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3542        result = expression.this
3543
3544        # Check if input is a string type - ENCODE only accepts VARCHAR
3545        if result.is_type(*exp.DataType.TEXT_TYPES):
3546            result = exp.Encode(this=result)
3547
3548        result = exp.ToBase64(this=result)
3549
3550        max_line_length = expression.args.get("max_line_length")
3551        alphabet = expression.args.get("alphabet")
3552
3553        # Handle custom alphabet by replacing standard chars with custom ones
3554        result = _apply_base64_alphabet_replacements(result, alphabet)
3555
3556        # Handle max_line_length by inserting newlines every N characters
3557        line_length = (
3558            t.cast(int, max_line_length.to_py())
3559            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3560            else 0
3561        )
3562        if line_length > 0:
3563            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3564            result = exp.Trim(
3565                this=exp.RegexpReplace(
3566                    this=result,
3567                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3568                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3569                ),
3570                expression=newline,
3571                position="TRAILING",
3572            )
3573
3574        return self.sql(result)
3575
3576    def replace_sql(self, expression: exp.Replace) -> str:
3577        result_sql = self.func(
3578            "REPLACE",
3579            _cast_to_varchar(expression.this),
3580            _cast_to_varchar(expression.expression),
3581            _cast_to_varchar(expression.args.get("replacement")),
3582        )
3583        return _gen_with_cast_to_blob(self, expression, result_sql)
3584
3585    def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
3586        _prepare_binary_bitwise_args(expression)
3587        result_sql = self.binary(expression, op)
3588        return _gen_with_cast_to_blob(self, expression, result_sql)
3589
3590    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3591        _prepare_binary_bitwise_args(expression)
3592        result_sql = self.func("XOR", expression.this, expression.expression)
3593        return _gen_with_cast_to_blob(self, expression, result_sql)
3594
3595    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3596        this = expression.this
3597        key = expression.args.get("key")
3598        key_sql = key.name if isinstance(key, exp.Expr) else ""
3599        value_sql = self.sql(expression, "value")
3600
3601        kv_sql = f"{key_sql} := {value_sql}"
3602
3603        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3604        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3605        if isinstance(this, exp.Struct) and not this.expressions:
3606            return self.func("STRUCT_PACK", kv_sql)
3607
3608        return self.func("STRUCT_INSERT", this, kv_sql)
3609
3610    def mapcat_sql(self, expression: exp.MapCat) -> str:
3611        result = exp.replace_placeholders(
3612            self.MAPCAT_TEMPLATE.copy(),
3613            map1=expression.this,
3614            map2=expression.expression,
3615        )
3616        return self.sql(result)
3617
3618    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3619        return self.func(
3620            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3621        )
3622
3623    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3624        map_arg = expression.this
3625        keys_to_delete = expression.expressions
3626
3627        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3628
3629        lambda_expr = exp.Lambda(
3630            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3631            expressions=[exp.to_identifier("x")],
3632        )
3633        result = exp.func(
3634            "MAP_FROM_ENTRIES",
3635            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3636        )
3637        return self.sql(result)
3638
3639    def mappick_sql(self, expression: exp.MapPick) -> str:
3640        map_arg = expression.this
3641        keys_to_pick = expression.expressions
3642
3643        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3644
3645        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3646            lambda_expr = exp.Lambda(
3647                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3648                expressions=[exp.to_identifier("x")],
3649            )
3650        else:
3651            lambda_expr = exp.Lambda(
3652                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3653                expressions=[exp.to_identifier("x")],
3654            )
3655
3656        result = exp.func(
3657            "MAP_FROM_ENTRIES",
3658            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3659        )
3660        return self.sql(result)
3661
3662    def mapsize_sql(self, expression: exp.MapSize) -> str:
3663        return self.func("CARDINALITY", expression.this)
3664
3665    @unsupported_args("update_flag")
3666    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3667        map_arg = expression.this
3668        key = expression.args.get("key")
3669        value = expression.args.get("value")
3670
3671        map_type = map_arg.type
3672
3673        if value is not None:
3674            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3675                # Extract the value type from MAP(key_type, value_type)
3676                value_type = map_type.expressions[1]
3677                # Cast value to match the map's value type to avoid type conflicts
3678                value = exp.cast(value, value_type)
3679            # else: polymorphic MAP case - no type parameters available, use value as-is
3680
3681        # Create a single-entry map for the new key-value pair
3682        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3683        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3684
3685        # Use MAP_CONCAT to merge the original map with the new entry
3686        # This automatically handles both insert and update cases
3687        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3688
3689        return self.sql(result)
3690
3691    def startswith_sql(self, expression: exp.StartsWith) -> str:
3692        return self.func(
3693            "STARTS_WITH",
3694            _cast_to_varchar(expression.this),
3695            _cast_to_varchar(expression.expression),
3696        )
3697
3698    def space_sql(self, expression: exp.Space) -> str:
3699        # DuckDB's REPEAT requires BIGINT for the count parameter
3700        return self.sql(
3701            exp.Repeat(
3702                this=exp.Literal.string(" "),
3703                times=exp.cast(expression.this, exp.DType.BIGINT),
3704            )
3705        )
3706
3707    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3708        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3709        if isinstance(expression.this, exp.Generator):
3710            # Preserve alias, joins, and other table-level args
3711            table = exp.Table(
3712                this=expression.this,
3713                alias=expression.args.get("alias"),
3714                joins=expression.args.get("joins"),
3715            )
3716            return self.sql(table)
3717
3718        return super().tablefromrows_sql(expression)
3719
3720    def unnest_sql(self, expression: exp.Unnest) -> str:
3721        explode_array = expression.args.get("explode_array")
3722        if explode_array:
3723            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3724            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3725            expression.expressions.append(
3726                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3727            )
3728
3729            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3730            alias = expression.args.get("alias")
3731            if isinstance(alias, exp.TableAlias):
3732                expression.set("alias", None)
3733                if alias.columns:
3734                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3735
3736            unnest_sql = super().unnest_sql(expression)
3737            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3738            return self.sql(select)
3739
3740        return super().unnest_sql(expression)
3741
3742    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3743        this = expression.this
3744
3745        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3746            # DuckDB should render IGNORE NULLS only for the general-purpose
3747            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3748            return super().ignorenulls_sql(expression)
3749
3750        if isinstance(this, exp.First):
3751            this = exp.AnyValue(this=this.this)
3752
3753        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3754            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3755
3756        return self.sql(this)
3757
3758    def split_sql(self, expression: exp.Split) -> str:
3759        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3760
3761        case_expr = exp.case().else_(base_func)
3762        needs_case = False
3763
3764        if expression.args.get("null_returns_null"):
3765            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3766            needs_case = True
3767
3768        if expression.args.get("empty_delimiter_returns_whole"):
3769            # When delimiter is empty string, return input string as single array element
3770            array_with_input = exp.array(expression.this)
3771            case_expr = case_expr.when(
3772                expression.expression.eq(exp.Literal.string("")), array_with_input
3773            )
3774            needs_case = True
3775
3776        return self.sql(case_expr if needs_case else base_func)
3777
3778    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3779        string_arg = expression.this
3780        delimiter_arg = expression.args.get("delimiter")
3781        part_index_arg = expression.args.get("part_index")
3782
3783        if delimiter_arg and part_index_arg:
3784            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3785            if expression.args.get("part_index_zero_as_one"):
3786                # Convert 0 to 1 for compatibility
3787
3788                part_index_arg = exp.Paren(
3789                    this=exp.case()
3790                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3791                    .else_(part_index_arg)
3792                )
3793
3794            # Use Anonymous to avoid recursion
3795            base_func_expr: exp.Expr = exp.Anonymous(
3796                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3797            )
3798            needs_case_transform = False
3799            case_expr = exp.case().else_(base_func_expr)
3800
3801            if expression.args.get("empty_delimiter_returns_whole"):
3802                # When delimiter is empty string:
3803                # - Return whole string if part_index is 1 or -1
3804                # - Return empty string otherwise
3805                empty_case = exp.Paren(
3806                    this=exp.case()
3807                    .when(
3808                        exp.or_(
3809                            part_index_arg.eq(exp.Literal.number("1")),
3810                            part_index_arg.eq(exp.Literal.number("-1")),
3811                        ),
3812                        string_arg,
3813                    )
3814                    .else_(exp.Literal.string(""))
3815                )
3816
3817                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3818                needs_case_transform = True
3819
3820            """
3821            Output looks something like this:
3822
3823            CASE
3824            WHEN delimiter is '' THEN
3825                (
3826                    CASE
3827                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3828                    ELSE '' END
3829                )
3830            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3831            END
3832
3833            """
3834            return self.sql(case_expr if needs_case_transform else base_func_expr)
3835
3836        return self.function_fallback_sql(expression)
3837
3838    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3839        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3840            # DuckDB should render RESPECT NULLS only for the general-purpose
3841            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3842            return super().respectnulls_sql(expression)
3843
3844        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3845        return self.sql(expression, "this")
3846
3847    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3848        null = expression.args.get("null")
3849
3850        if expression.args.get("null_is_empty"):
3851            x = exp.to_identifier("x")
3852            list_transform = exp.Transform(
3853                this=expression.this.copy(),
3854                expression=exp.Lambda(
3855                    this=exp.Coalesce(
3856                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3857                    ),
3858                    expressions=[x],
3859                ),
3860            )
3861            array_to_string = exp.ArrayToString(
3862                this=list_transform, expression=expression.expression
3863            )
3864            if expression.args.get("null_delim_is_null"):
3865                return self.sql(
3866                    exp.case()
3867                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3868                    .else_(array_to_string)
3869                )
3870            return self.sql(array_to_string)
3871
3872        if null:
3873            x = exp.to_identifier("x")
3874            return self.sql(
3875                exp.ArrayToString(
3876                    this=exp.Transform(
3877                        this=expression.this,
3878                        expression=exp.Lambda(
3879                            this=exp.Coalesce(this=x, expressions=[null]),
3880                            expressions=[x],
3881                        ),
3882                    ),
3883                    expression=expression.expression,
3884                )
3885            )
3886
3887        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
3888
3889    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3890        # DuckDB-specific: handle binary types using DPipe (||) operator
3891        separator = seq_get(expression.expressions, 0)
3892        args = expression.expressions[1:]
3893
3894        if any(_is_binary(arg) for arg in [separator, *args]):
3895            result = args[0]
3896            for arg in args[1:]:
3897                result = exp.DPipe(
3898                    this=exp.DPipe(this=result, expression=separator), expression=arg
3899                )
3900            return self.sql(result)
3901
3902        return super().concatws_sql(expression)
3903
3904    def _regexp_extract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str:
3905        this = expression.this
3906        group = expression.args.get("group")
3907        params = expression.args.get("parameters")
3908        position = expression.args.get("position")
3909        occurrence = expression.args.get("occurrence")
3910        null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
3911
3912        # Handle Snowflake's 'e' flag: it enables capture group extraction
3913        # In DuckDB, this is controlled by the group parameter directly
3914        if params and params.is_string and "e" in params.name:
3915            params = exp.Literal.string(params.name.replace("e", ""))
3916
3917        validated_flags = self._validate_regexp_flags(params, supported_flags="cims")
3918
3919        # Strip default group when no following params (DuckDB default is same as group=0)
3920        if (
3921            not validated_flags
3922            and group
3923            and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
3924        ):
3925            group = None
3926
3927        flags_expr = exp.Literal.string(validated_flags) if validated_flags else None
3928
3929        # use substring to handle position argument
3930        if position and (not position.is_int or position.to_py() > 1):
3931            this = exp.Substring(this=this, start=position)
3932
3933            if null_if_pos_overflow:
3934                this = exp.Nullif(this=this, expression=exp.Literal.string(""))
3935
3936        is_extract_all = isinstance(expression, exp.RegexpExtractAll)
3937        non_single_occurrence = occurrence and (not occurrence.is_int or occurrence.to_py() > 1)
3938
3939        if is_extract_all or non_single_occurrence:
3940            name = "REGEXP_EXTRACT_ALL"
3941        else:
3942            name = "REGEXP_EXTRACT"
3943
3944        result: exp.Expr = exp.Anonymous(
3945            this=name, expressions=[this, expression.expression, group, flags_expr]
3946        )
3947
3948        # Array slicing for REGEXP_EXTRACT_ALL with occurrence
3949        if is_extract_all and non_single_occurrence:
3950            result = exp.Bracket(this=result, expressions=[exp.Slice(this=occurrence)])
3951        # ARRAY_EXTRACT for REGEXP_EXTRACT with occurrence > 1
3952        elif non_single_occurrence:
3953            result = exp.Anonymous(this="ARRAY_EXTRACT", expressions=[result, occurrence])
3954
3955        return self.sql(result)
3956
3957    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
3958        return self._regexp_extract_sql(expression)
3959
3960    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
3961        return self._regexp_extract_sql(expression)
3962
3963    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
3964        this = expression.this
3965        pattern = expression.expression
3966        position = expression.args.get("position")
3967        orig_occ = expression.args.get("occurrence")
3968        occurrence = orig_occ or exp.Literal.number(1)
3969        option = expression.args.get("option")
3970        parameters = expression.args.get("parameters")
3971
3972        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3973        if validated_flags:
3974            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3975
3976        # Handle starting position offset
3977        pos_offset: exp.Expr = exp.Literal.number(0)
3978        if position and (not position.is_int or position.to_py() > 1):
3979            this = exp.Substring(this=this, start=position)
3980            pos_offset = position - exp.Literal.number(1)
3981
3982        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
3983        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
3984            lst = exp.Bracket(
3985                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
3986                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
3987                offset=1,
3988            )
3989            transform = exp.Anonymous(
3990                this="LIST_TRANSFORM",
3991                expressions=[
3992                    lst,
3993                    exp.Lambda(
3994                        this=exp.Length(this=exp.to_identifier("x")),
3995                        expressions=[exp.to_identifier("x")],
3996                    ),
3997                ],
3998            )
3999            return exp.Coalesce(
4000                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4001                expressions=[exp.Literal.number(0)],
4002            )
4003
4004        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4005        base_pos: exp.Expr = (
4006            exp.Literal.number(1)
4007            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4008            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4009            + pos_offset
4010        )
4011
4012        # option=1: add match length for end position
4013        if option and option.is_int and option.to_py() == 1:
4014            match_at_occ = exp.Bracket(
4015                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4016                expressions=[occurrence],
4017                offset=1,
4018            )
4019            base_pos = base_pos + exp.Coalesce(
4020                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4021            )
4022
4023        # NULL checks for all provided arguments
4024        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4025        null_args = [
4026            expression.this,
4027            expression.expression,
4028            position,
4029            orig_occ,
4030            option,
4031            parameters,
4032        ]
4033        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4034
4035        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4036
4037        return self.sql(
4038            exp.case()
4039            .when(exp.or_(*null_checks), exp.Null())
4040            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4041            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4042            .else_(base_pos)
4043        )
4044
4045    @unsupported_args("culture")
4046    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4047        fmt = expression.args.get("format")
4048        if fmt and fmt.is_int:
4049            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4050
4051        self.unsupported("Only integer formats are supported by NumberToStr")
4052        return self.function_fallback_sql(expression)
4053
4054    def autoincrementcolumnconstraint_sql(self, _) -> str:
4055        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4056        return ""
4057
4058    def aliases_sql(self, expression: exp.Aliases) -> str:
4059        this = expression.this
4060        if isinstance(this, exp.Posexplode):
4061            return self.posexplode_sql(this)
4062
4063        return super().aliases_sql(expression)
4064
4065    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4066        this = expression.this
4067        parent = expression.parent
4068
4069        # The default Spark aliases are "pos" and "col", unless specified otherwise
4070        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4071
4072        if isinstance(parent, exp.Aliases):
4073            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4074            pos, col = parent.expressions
4075        elif isinstance(parent, exp.Table):
4076            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4077            alias = parent.args.get("alias")
4078            if alias:
4079                pos, col = alias.columns or [pos, col]
4080                alias.pop()
4081
4082        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4083        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4084        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4085        gen_subscripts = self.sql(
4086            exp.Alias(
4087                this=exp.Anonymous(
4088                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4089                )
4090                - exp.Literal.number(1),
4091                alias=pos,
4092            )
4093        )
4094
4095        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4096
4097        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4098            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4099            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4100
4101        return posexplode_sql
4102
4103    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4104        """
4105        Handles three key issues:
4106        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4107        2. End-of-month preservation: If input is last day of month, result is last day of result month
4108        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4109        """
4110        from sqlglot.optimizer.annotate_types import annotate_types
4111
4112        this = expression.this
4113        if not this.type:
4114            this = annotate_types(this, dialect=self.dialect)
4115
4116        if this.is_type(*exp.DataType.TEXT_TYPES):
4117            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4118
4119        # Detect float/decimal months to apply rounding (Snowflake behavior)
4120        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4121        months_expr = expression.expression
4122        if not months_expr.type:
4123            months_expr = annotate_types(months_expr, dialect=self.dialect)
4124
4125        # Build interval or to_months expression based on type
4126        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4127        interval_or_to_months = (
4128            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4129            if months_expr.is_type(
4130                exp.DType.FLOAT,
4131                exp.DType.DOUBLE,
4132                exp.DType.DECIMAL,
4133            )
4134            # Integer case: standard INTERVAL N MONTH syntax
4135            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4136        )
4137
4138        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4139
4140        # Apply end-of-month preservation if Snowflake flag is set
4141        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4142        preserve_eom = expression.args.get("preserve_end_of_month")
4143        result_expr = (
4144            exp.case()
4145            .when(
4146                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4147                exp.func("LAST_DAY", date_add_expr),
4148            )
4149            .else_(date_add_expr)
4150            if preserve_eom
4151            else date_add_expr
4152        )
4153
4154        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4155        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4156        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4157        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4158        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4159            return self.sql(exp.Cast(this=result_expr, to=this.type))
4160        return self.sql(result_expr)
4161
4162    def format_sql(self, expression: exp.Format) -> str:
4163        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4164            return self.func("FORMAT", "'{}'", expression.expressions[0])
4165
4166        return self.function_fallback_sql(expression)
4167
4168    def hexstring_sql(
4169        self, expression: exp.HexString, binary_function_repr: str | None = None
4170    ) -> str:
4171        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4172        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
4173
4174    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4175        unit = expression.args.get("unit")
4176        date = expression.this
4177
4178        week_start = _week_unit_to_dow(unit)
4179        unit = unit_to_str(expression)
4180
4181        if week_start:
4182            result = self.sql(
4183                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4184            )
4185        else:
4186            result = self.func("DATE_TRUNC", unit, date)
4187
4188        if (
4189            expression.args.get("input_type_preserved")
4190            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4191            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4192        ):
4193            return self.sql(exp.Cast(this=result, to=date.type))
4194
4195        return result
4196
4197    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4198        unit = unit_to_str(expression)
4199        zone = expression.args.get("zone")
4200        timestamp = expression.this
4201        date_unit = is_date_unit(unit)
4202
4203        if date_unit and zone:
4204            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4205            # Double AT TIME ZONE needed for BigQuery compatibility:
4206            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4207            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4208            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4209            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4210            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4211
4212        result = self.func("DATE_TRUNC", unit, timestamp)
4213        if expression.args.get("input_type_preserved"):
4214            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4215                dummy_date = exp.Cast(
4216                    this=exp.Literal.string("1970-01-01"),
4217                    to=exp.DataType(this=exp.DType.DATE),
4218                )
4219                date_time = exp.Add(this=dummy_date, expression=timestamp)
4220                result = self.func("DATE_TRUNC", unit, date_time)
4221                return self.sql(exp.Cast(this=result, to=timestamp.type))
4222
4223            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4224                date_unit and timestamp.is_type(exp.DType.DATE)
4225            ):
4226                return self.sql(exp.Cast(this=result, to=timestamp.type))
4227
4228        return result
4229
4230    def trim_sql(self, expression: exp.Trim) -> str:
4231        expression.this.replace(_cast_to_varchar(expression.this))
4232        if expression.expression:
4233            expression.expression.replace(_cast_to_varchar(expression.expression))
4234
4235        result_sql = super().trim_sql(expression)
4236        return _gen_with_cast_to_blob(self, expression, result_sql)
4237
4238    def round_sql(self, expression: exp.Round) -> str:
4239        this = expression.this
4240        decimals = expression.args.get("decimals")
4241        truncate = expression.args.get("truncate")
4242
4243        # DuckDB requires the scale (decimals) argument to be an INT
4244        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4245        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4246            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4247                decimals = exp.cast(decimals, exp.DType.INT)
4248
4249        func = "ROUND"
4250        if truncate:
4251            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4252            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4253                func = "ROUND_EVEN"
4254                truncate = None
4255            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4256            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4257                truncate = None
4258
4259        return self.func(func, this, decimals, truncate)
4260
4261    def strtok_sql(self, expression: exp.Strtok) -> str:
4262        string_arg = expression.this
4263        delimiter_arg = expression.args.get("delimiter")
4264        part_index_arg = expression.args.get("part_index")
4265
4266        if delimiter_arg and part_index_arg:
4267            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4268            escaped_delimiter = exp.Anonymous(
4269                this="REGEXP_REPLACE",
4270                expressions=[
4271                    delimiter_arg,
4272                    exp.Literal.string(
4273                        r"([\[\]^.\-*+?(){}|$\\])"
4274                    ),  # Escape problematic regex chars
4275                    exp.Literal.string(
4276                        r"\\\1"
4277                    ),  # Replace with escaped version using $1 backreference
4278                    exp.Literal.string("g"),  # Global flag
4279                ],
4280            )
4281            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4282            regex_pattern = (
4283                exp.case()
4284                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4285                .else_(
4286                    exp.func(
4287                        "CONCAT",
4288                        exp.Literal.string("["),
4289                        escaped_delimiter,
4290                        exp.Literal.string("]"),
4291                    )
4292                )
4293            )
4294
4295            # STRTOK skips empty strings, so we need to filter them out
4296            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4297            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4298            x = exp.to_identifier("x")
4299            is_empty = x.eq(exp.Literal.string(""))
4300            filtered_array = exp.func(
4301                "LIST_FILTER",
4302                split_array,
4303                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4304            )
4305            base_func = exp.Bracket(
4306                this=filtered_array,
4307                expressions=[part_index_arg],
4308                offset=1,
4309            )
4310
4311            # Use template with the built regex pattern
4312            result = exp.replace_placeholders(
4313                self.STRTOK_TEMPLATE.copy(),
4314                string=string_arg,
4315                delimiter=delimiter_arg,
4316                part_index=part_index_arg,
4317                base_func=base_func,
4318            )
4319
4320            return self.sql(result)
4321
4322        return self.function_fallback_sql(expression)
4323
4324    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4325        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4326
4327        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4328        if expression.is_type(*exp.DataType.REAL_TYPES):
4329            result = f"CAST({result} AS DOUBLE)"
4330
4331        return result
4332
4333    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4334        """
4335        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4336        dividing the input distribution into n equal-sized buckets.
4337
4338        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4339        does not document the specific algorithm used so results may differ. DuckDB does not
4340        support RESPECT NULLS.
4341        """
4342        this = expression.this
4343        if isinstance(this, exp.Distinct):
4344            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4345            if len(this.expressions) < 2:
4346                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4347                return self.function_fallback_sql(expression)
4348            num_quantiles_expr = this.expressions[1].pop()
4349        else:
4350            num_quantiles_expr = expression.expression
4351
4352        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4353            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4354            return self.function_fallback_sql(expression)
4355
4356        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4357        if num_quantiles <= 0:
4358            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4359            return self.function_fallback_sql(expression)
4360
4361        quantiles = [
4362            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4363            for i in range(num_quantiles + 1)
4364        ]
4365
4366        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))
4367
4368    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4369        if expression.args.get("scalar_only"):
4370            expression = exp.JSONExtractScalar(
4371                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4372            )
4373        return _arrow_json_extract_sql(self, expression)
4374
4375    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4376        this = expression.this
4377
4378        if _is_binary(this):
4379            expression.type = exp.DType.BINARY.into_expr()
4380
4381        arg = _cast_to_bit(this)
4382
4383        if isinstance(this, exp.Neg):
4384            arg = exp.Paren(this=arg)
4385
4386        expression.set("this", arg)
4387
4388        result_sql = f"~{self.sql(expression, 'this')}"
4389
4390        return _gen_with_cast_to_blob(self, expression, result_sql)
4391
4392    def window_sql(self, expression: exp.Window) -> str:
4393        this = expression.this
4394        if isinstance(this, exp.Corr) or (
4395            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4396        ):
4397            return self._corr_sql(expression)
4398
4399        return super().window_sql(expression)
4400
4401    def filter_sql(self, expression: exp.Filter) -> str:
4402        if isinstance(expression.this, exp.Corr):
4403            return self._corr_sql(expression)
4404
4405        return super().filter_sql(expression)
4406
4407    def _corr_sql(
4408        self,
4409        expression: exp.Filter | exp.Window | exp.Corr,
4410    ) -> str:
4411        if isinstance(expression, exp.Corr) and not expression.args.get("null_on_zero_variance"):
4412            return self.func("CORR", expression.this, expression.expression)
4413
4414        corr_expr = _maybe_corr_null_to_false(expression)
4415        if corr_expr is None:
4416            if isinstance(expression, exp.Window):
4417                return super().window_sql(expression)
4418            if isinstance(expression, exp.Filter):
4419                return super().filter_sql(expression)
4420            corr_expr = expression  # make mypy happy
4421
4422        return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
TIMEZONE_PATTERN = re.compile(':\\d{2}.*?[+\\-]\\d{2}(?::\\d{2})?')
REGEX_ESCAPE_REPLACEMENTS = {'\\': '\\\\', '-': '\\-', '^': '\\^', '[': '\\[', ']': '\\]'}
RANDSTR_CHAR_POOL = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
RANDSTR_SEED = 123456
WS_CONTROL_CHARS_TO_DUCK = {'\x0b': 11, '\x1c': 28, '\x1d': 29, '\x1e': 30, '\x1f': 31}
WEEK_START_DAY_TO_DOW = {'MONDAY': 1, 'TUESDAY': 2, 'WEDNESDAY': 3, 'THURSDAY': 4, 'FRIDAY': 5, 'SATURDAY': 6, 'SUNDAY': 7}
MAX_BIT_POSITION = Literal(this=32768, is_string=False)
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (<class 'sqlglot.expressions.core.Binary'>, <class 'sqlglot.expressions.core.Bracket'>, <class 'sqlglot.expressions.core.In'>, <class 'sqlglot.expressions.core.Not'>)
class DuckDBGenerator(sqlglot.generator.Generator):
1456class DuckDBGenerator(generator.Generator):
1457    PARAMETER_TOKEN = "$"
1458    NAMED_PLACEHOLDER_TOKEN = "$"
1459    JOIN_HINTS = False
1460    TABLE_HINTS = False
1461    QUERY_HINTS = False
1462    LIMIT_FETCH = "LIMIT"
1463    STRUCT_DELIMITER = ("(", ")")
1464    RENAME_TABLE_WITH_DB = False
1465    NVL2_SUPPORTED = False
1466    SEMI_ANTI_JOIN_WITH_SIDE = False
1467    TABLESAMPLE_KEYWORDS = "USING SAMPLE"
1468    TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
1469    LAST_DAY_SUPPORTS_DATE_PART = False
1470    JSON_KEY_VALUE_PAIR_SEP = ","
1471    IGNORE_NULLS_IN_FUNC = True
1472    IGNORE_NULLS_BEFORE_ORDER = False
1473    JSON_PATH_BRACKETED_KEY_SUPPORTED = False
1474    SUPPORTS_CREATE_TABLE_LIKE = False
1475    MULTI_ARG_DISTINCT = False
1476    CAN_IMPLEMENT_ARRAY_ANY = True
1477    SUPPORTS_TO_NUMBER = False
1478    SELECT_KINDS: tuple[str, ...] = ()
1479    SUPPORTS_DECODE_CASE = False
1480    SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
1481
1482    AFTER_HAVING_MODIFIER_TRANSFORMS = generator.AFTER_HAVING_MODIFIER_TRANSFORMS
1483    SUPPORTS_WINDOW_EXCLUDE = True
1484    COPY_HAS_INTO_KEYWORD = False
1485    STAR_EXCEPT = "EXCLUDE"
1486    PAD_FILL_PATTERN_IS_REQUIRED = True
1487    ARRAY_SIZE_DIM_REQUIRED: bool | None = False
1488    NORMALIZE_EXTRACT_DATE_PARTS = True
1489    SUPPORTS_LIKE_QUANTIFIERS = False
1490    SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
1491
1492    TRANSFORMS = {
1493        **generator.Generator.TRANSFORMS,
1494        exp.AnyValue: _anyvalue_sql,
1495        exp.ApproxDistinct: approx_count_distinct_sql,
1496        exp.Boolnot: _boolnot_sql,
1497        exp.Booland: _booland_sql,
1498        exp.Boolor: _boolor_sql,
1499        exp.Array: transforms.preprocess(
1500            [transforms.inherit_struct_field_names],
1501            generator=inline_array_unless_query,
1502        ),
1503        exp.ArrayAppend: array_append_sql("LIST_APPEND"),
1504        exp.ArrayCompact: array_compact_sql,
1505        exp.ArrayConstructCompact: lambda self, e: self.sql(
1506            exp.ArrayCompact(this=exp.Array(expressions=e.expressions))
1507        ),
1508        exp.ArrayConcat: array_concat_sql("LIST_CONCAT"),
1509        exp.ArrayContains: _array_contains_sql,
1510        exp.ArrayOverlaps: _array_overlaps_sql,
1511        exp.ArrayFilter: rename_func("LIST_FILTER"),
1512        exp.ArrayInsert: _array_insert_sql,
1513        exp.ArrayPosition: lambda self, e: (
1514            self.sql(
1515                exp.Sub(
1516                    this=exp.ArrayPosition(this=e.this, expression=e.expression),
1517                    expression=exp.Literal.number(1),
1518                )
1519            )
1520            if e.args.get("zero_based")
1521            else self.func("ARRAY_POSITION", e.this, e.expression)
1522        ),
1523        exp.ArrayRemoveAt: _array_remove_at_sql,
1524        exp.ArrayRemove: remove_from_array_using_filter,
1525        exp.ArraySort: _array_sort_sql,
1526        exp.ArrayPrepend: array_append_sql("LIST_PREPEND", swap_params=True),
1527        exp.ArraySum: rename_func("LIST_SUM"),
1528        exp.ArrayMax: rename_func("LIST_MAX"),
1529        exp.ArrayMin: rename_func("LIST_MIN"),
1530        exp.Base64DecodeBinary: lambda self, e: _base64_decode_sql(self, e, to_string=False),
1531        exp.Base64DecodeString: lambda self, e: _base64_decode_sql(self, e, to_string=True),
1532        exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
1533        exp.BitwiseAndAgg: _bitwise_agg_sql,
1534        exp.BitwiseCount: rename_func("BIT_COUNT"),
1535        exp.BitwiseLeftShift: _bitshift_sql,
1536        exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
1537        exp.BitwiseOrAgg: _bitwise_agg_sql,
1538        exp.BitwiseRightShift: _bitshift_sql,
1539        exp.BitwiseXorAgg: _bitwise_agg_sql,
1540        exp.CommentColumnConstraint: no_comment_column_constraint_sql,
1541        exp.Corr: lambda self, e: self._corr_sql(e),
1542        exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
1543        exp.CurrentTime: lambda *_: "CURRENT_TIME",
1544        exp.CurrentSchemas: lambda self, e: self.func(
1545            "current_schemas", e.this if e.this else exp.true()
1546        ),
1547        exp.CurrentTimestamp: lambda self, e: (
1548            self.sql(
1549                exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC"))
1550            )
1551            if e.args.get("sysdate")
1552            else "CURRENT_TIMESTAMP"
1553        ),
1554        exp.CurrentVersion: rename_func("version"),
1555        exp.Localtime: unsupported_args("this")(lambda *_: "LOCALTIME"),
1556        exp.DayOfMonth: rename_func("DAYOFMONTH"),
1557        exp.DayOfWeek: rename_func("DAYOFWEEK"),
1558        exp.DayOfWeekIso: rename_func("ISODOW"),
1559        exp.DayOfYear: rename_func("DAYOFYEAR"),
1560        exp.Dayname: lambda self, e: (
1561            self.func("STRFTIME", e.this, exp.Literal.string("%a"))
1562            if e.args.get("abbreviated")
1563            else self.func("DAYNAME", e.this)
1564        ),
1565        exp.Monthname: lambda self, e: (
1566            self.func("STRFTIME", e.this, exp.Literal.string("%b"))
1567            if e.args.get("abbreviated")
1568            else self.func("MONTHNAME", e.this)
1569        ),
1570        exp.DataType: _datatype_sql,
1571        exp.Date: _date_sql,
1572        exp.DateAdd: _date_delta_to_binary_interval_op(),
1573        exp.DateFromParts: _date_from_parts_sql,
1574        exp.DateSub: _date_delta_to_binary_interval_op(),
1575        exp.DateDiff: _date_diff_sql,
1576        exp.DateStrToDate: datestrtodate_sql,
1577        exp.Datetime: no_datetime_sql,
1578        exp.DatetimeDiff: _date_diff_sql,
1579        exp.DatetimeSub: _date_delta_to_binary_interval_op(),
1580        exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
1581        exp.DateToDi: lambda self, e: (
1582            f"CAST(STRFTIME({self.sql(e, 'this')}, {self.dialect.DATEINT_FORMAT}) AS INT)"
1583        ),
1584        exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
1585        exp.DiToDate: lambda self, e: (
1586            f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {self.dialect.DATEINT_FORMAT}) AS DATE)"
1587        ),
1588        exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
1589        exp.EqualNull: lambda self, e: self.sql(
1590            exp.NullSafeEQ(this=e.this, expression=e.expression)
1591        ),
1592        exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
1593        exp.GenerateDateArray: _generate_datetime_array_sql,
1594        exp.GenerateSeries: generate_series_sql("GENERATE_SERIES", "RANGE"),
1595        exp.GenerateTimestampArray: _generate_datetime_array_sql,
1596        exp.Getbit: getbit_sql,
1597        exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
1598        exp.Explode: rename_func("UNNEST"),
1599        exp.IcebergProperty: lambda *_: "",
1600        exp.IntDiv: lambda self, e: self.binary(e, "//"),
1601        exp.IsInf: rename_func("ISINF"),
1602        exp.IsNan: rename_func("ISNAN"),
1603        exp.IsNullValue: lambda self, e: self.sql(
1604            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("NULL"))
1605        ),
1606        exp.IsArray: lambda self, e: self.sql(
1607            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("ARRAY"))
1608        ),
1609        exp.Ceil: _ceil_floor,
1610        exp.Floor: _ceil_floor,
1611        exp.JSONBExists: rename_func("JSON_EXISTS"),
1612        exp.JSONExtract: _arrow_json_extract_sql,
1613        exp.JSONExtractArray: _json_extract_value_array_sql,
1614        exp.JSONFormat: _json_format_sql,
1615        exp.JSONValueArray: _json_extract_value_array_sql,
1616        exp.Lateral: _explode_to_unnest_sql,
1617        exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)),
1618        exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)),
1619        exp.Select: transforms.preprocess([_seq_to_range_in_generator]),
1620        exp.Seq1: lambda self, e: _seq_sql(self, e, 1),
1621        exp.Seq2: lambda self, e: _seq_sql(self, e, 2),
1622        exp.Seq4: lambda self, e: _seq_sql(self, e, 4),
1623        exp.Seq8: lambda self, e: _seq_sql(self, e, 8),
1624        exp.BoolxorAgg: _boolxor_agg_sql,
1625        exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
1626        exp.Initcap: _initcap_sql,
1627        exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
1628        exp.SHA: lambda self, e: _sha_sql(self, e, "SHA1"),
1629        exp.SHA1Digest: lambda self, e: _sha_sql(self, e, "SHA1", is_binary=True),
1630        exp.SHA2: lambda self, e: _sha_sql(self, e, "SHA256"),
1631        exp.SHA2Digest: lambda self, e: _sha_sql(self, e, "SHA256", is_binary=True),
1632        exp.MonthsBetween: months_between_sql,
1633        exp.NextDay: _day_navigation_sql,
1634        exp.PercentileCont: rename_func("QUANTILE_CONT"),
1635        exp.PercentileDisc: rename_func("QUANTILE_DISC"),
1636        # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
1637        # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
1638        exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
1639        exp.PreviousDay: _day_navigation_sql,
1640        exp.RegexpILike: lambda self, e: self.func(
1641            "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
1642        ),
1643        exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
1644        exp.RegrValx: _regr_val_sql,
1645        exp.RegrValy: _regr_val_sql,
1646        exp.Return: lambda self, e: self.sql(e, "this"),
1647        exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
1648        exp.StrToUnix: lambda self, e: self.func(
1649            "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
1650        ),
1651        exp.Struct: _struct_sql,
1652        exp.Transform: rename_func("LIST_TRANSFORM"),
1653        exp.TimeAdd: _date_delta_to_binary_interval_op(),
1654        exp.TimeSub: _date_delta_to_binary_interval_op(),
1655        exp.Time: no_time_sql,
1656        exp.TimeDiff: _timediff_sql,
1657        exp.Timestamp: no_timestamp_sql,
1658        exp.TimestampAdd: _date_delta_to_binary_interval_op(),
1659        exp.TimestampDiff: lambda self, e: self.func(
1660            "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
1661        ),
1662        exp.TimestampSub: _date_delta_to_binary_interval_op(),
1663        exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DType.DATE)),
1664        exp.TimeStrToTime: timestrtotime_sql,
1665        exp.TimeStrToUnix: lambda self, e: self.func(
1666            "EPOCH", exp.cast(e.this, exp.DType.TIMESTAMP)
1667        ),
1668        exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
1669        exp.ToBoolean: _to_boolean_sql,
1670        exp.ToVariant: lambda self, e: self.sql(
1671            exp.cast(e.this, exp.DataType.build("VARIANT", dialect="duckdb"))
1672        ),
1673        exp.TimeToUnix: rename_func("EPOCH"),
1674        exp.TsOrDiToDi: lambda self, e: (
1675            f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)"
1676        ),
1677        exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
1678        exp.TsOrDsDiff: lambda self, e: self.func(
1679            "DATE_DIFF",
1680            f"'{e.args.get('unit') or 'DAY'}'",
1681            exp.cast(e.expression, exp.DType.TIMESTAMP),
1682            exp.cast(e.this, exp.DType.TIMESTAMP),
1683        ),
1684        exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
1685        exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
1686        exp.UnixSeconds: lambda self, e: self.sql(
1687            exp.cast(self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DType.BIGINT)
1688        ),
1689        exp.UnixToStr: lambda self, e: self.func(
1690            "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
1691        ),
1692        exp.DatetimeTrunc: lambda self, e: self.func(
1693            "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DType.DATETIME)
1694        ),
1695        exp.UnixToTime: _unix_to_time_sql,
1696        exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
1697        exp.VariancePop: rename_func("VAR_POP"),
1698        exp.WeekOfYear: rename_func("WEEKOFYEAR"),
1699        exp.YearOfWeek: lambda self, e: self.sql(
1700            exp.Extract(
1701                this=exp.Var(this="ISOYEAR"),
1702                expression=e.this,
1703            )
1704        ),
1705        exp.YearOfWeekIso: lambda self, e: self.sql(
1706            exp.Extract(
1707                this=exp.Var(this="ISOYEAR"),
1708                expression=e.this,
1709            )
1710        ),
1711        exp.Xor: _xor_sql,
1712        exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1713        exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1714        exp.DateBin: rename_func("TIME_BUCKET"),
1715        exp.LastDay: _last_day_sql,
1716    }
1717
1718    SUPPORTED_JSON_PATH_PARTS = {
1719        exp.JSONPathKey,
1720        exp.JSONPathRoot,
1721        exp.JSONPathSubscript,
1722        exp.JSONPathWildcard,
1723    }
1724
1725    TYPE_MAPPING = {
1726        **generator.Generator.TYPE_MAPPING,
1727        exp.DType.BINARY: "BLOB",
1728        exp.DType.BPCHAR: "TEXT",
1729        exp.DType.CHAR: "TEXT",
1730        exp.DType.DATETIME: "TIMESTAMP",
1731        exp.DType.DECFLOAT: "DECIMAL(38, 5)",
1732        exp.DType.FLOAT: "REAL",
1733        exp.DType.JSONB: "JSON",
1734        exp.DType.NCHAR: "TEXT",
1735        exp.DType.NVARCHAR: "TEXT",
1736        exp.DType.UINT: "UINTEGER",
1737        exp.DType.VARBINARY: "BLOB",
1738        exp.DType.ROWVERSION: "BLOB",
1739        exp.DType.VARCHAR: "TEXT",
1740        exp.DType.TIMESTAMPLTZ: "TIMESTAMPTZ",
1741        exp.DType.TIMESTAMPNTZ: "TIMESTAMP",
1742        exp.DType.TIMESTAMP_S: "TIMESTAMP_S",
1743        exp.DType.TIMESTAMP_MS: "TIMESTAMP_MS",
1744        exp.DType.TIMESTAMP_NS: "TIMESTAMP_NS",
1745        exp.DType.BIGDECIMAL: "DECIMAL(38, 5)",
1746    }
1747
1748    # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
1749    RESERVED_KEYWORDS = {
1750        "array",
1751        "analyse",
1752        "union",
1753        "all",
1754        "when",
1755        "in_p",
1756        "default",
1757        "create_p",
1758        "window",
1759        "asymmetric",
1760        "to",
1761        "else",
1762        "localtime",
1763        "from",
1764        "end_p",
1765        "select",
1766        "current_date",
1767        "foreign",
1768        "with",
1769        "grant",
1770        "session_user",
1771        "or",
1772        "except",
1773        "references",
1774        "fetch",
1775        "limit",
1776        "group_p",
1777        "leading",
1778        "into",
1779        "collate",
1780        "offset",
1781        "do",
1782        "then",
1783        "localtimestamp",
1784        "check_p",
1785        "lateral_p",
1786        "current_role",
1787        "where",
1788        "asc_p",
1789        "placing",
1790        "desc_p",
1791        "user",
1792        "unique",
1793        "initially",
1794        "column",
1795        "both",
1796        "some",
1797        "as",
1798        "any",
1799        "only",
1800        "deferrable",
1801        "null_p",
1802        "current_time",
1803        "true_p",
1804        "table",
1805        "case",
1806        "trailing",
1807        "variadic",
1808        "for",
1809        "on",
1810        "distinct",
1811        "false_p",
1812        "not",
1813        "constraint",
1814        "current_timestamp",
1815        "returning",
1816        "primary",
1817        "intersect",
1818        "having",
1819        "analyze",
1820        "current_user",
1821        "and",
1822        "cast",
1823        "symmetric",
1824        "using",
1825        "order",
1826        "current_catalog",
1827    }
1828
1829    UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
1830
1831    # DuckDB doesn't generally support CREATE TABLE .. properties
1832    # https://duckdb.org/docs/sql/statements/create_table.html
1833    # There are a few exceptions (e.g. temporary tables) which are supported or
1834    # can be transpiled to DuckDB, so we explicitly override them accordingly
1835    PROPERTIES_LOCATION = {
1836        **{
1837            prop: exp.Properties.Location.UNSUPPORTED
1838            for prop in generator.Generator.PROPERTIES_LOCATION
1839        },
1840        exp.LikeProperty: exp.Properties.Location.POST_SCHEMA,
1841        exp.TemporaryProperty: exp.Properties.Location.POST_CREATE,
1842        exp.ReturnsProperty: exp.Properties.Location.POST_ALIAS,
1843        exp.SequenceProperties: exp.Properties.Location.POST_EXPRESSION,
1844        exp.IcebergProperty: exp.Properties.Location.POST_CREATE,
1845    }
1846
1847    IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS: t.ClassVar = _IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS
1848
1849    # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1850    ZIPF_TEMPLATE: exp.Expr = exp.maybe_parse(
1851        """
1852        WITH rand AS (SELECT :random_expr AS r),
1853        weights AS (
1854            SELECT i, 1.0 / POWER(i, :s) AS w
1855            FROM RANGE(1, :n + 1) AS t(i)
1856        ),
1857        cdf AS (
1858            SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1859            FROM weights
1860        )
1861        SELECT MIN(i)
1862        FROM cdf
1863        WHERE p >= (SELECT r FROM rand)
1864        """
1865    )
1866
1867    # Template for NORMAL transpilation using Box-Muller transform
1868    # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2))
1869    NORMAL_TEMPLATE: exp.Expr = exp.maybe_parse(
1870        ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))"
1871    )
1872
1873    # Template for generating a seeded pseudo-random value in [0, 1) from a hash
1874    SEEDED_RANDOM_TEMPLATE: exp.Expr = exp.maybe_parse("(ABS(HASH(:seed)) % 1000000) / 1000000.0")
1875
1876    # Template for generating signed and unsigned SEQ values within a specified range
1877    SEQ_UNSIGNED: exp.Expr = _SEQ_UNSIGNED
1878    SEQ_SIGNED: exp.Expr = _SEQ_SIGNED
1879
1880    # Template for MAP_CAT transpilation - Snowflake semantics:
1881    # 1. Returns NULL if either input is NULL
1882    # 2. For duplicate keys, prefers non-NULL value (COALESCE(m2[k], m1[k]))
1883    # 3. Filters out entries with NULL values from the result
1884    MAPCAT_TEMPLATE: exp.Expr = exp.maybe_parse(
1885        """
1886        CASE
1887            WHEN :map1 IS NULL OR :map2 IS NULL THEN NULL
1888            ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(
1889                LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(:map1), MAP_KEYS(:map2))),
1890                __k -> STRUCT_PACK(key := __k, value := COALESCE(:map2[__k], :map1[__k]))
1891            ), __x -> __x.value IS NOT NULL))
1892        END
1893        """
1894    )
1895
1896    # Mappings for EXTRACT/DATE_PART transpilation
1897    # Maps Snowflake specifiers unsupported in DuckDB to strftime format codes
1898    EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {
1899        "WEEKISO": ("%V", "INTEGER"),
1900        "YEAROFWEEK": ("%G", "INTEGER"),
1901        "YEAROFWEEKISO": ("%G", "INTEGER"),
1902        "NANOSECOND": ("%n", "BIGINT"),
1903    }
1904
1905    # Maps epoch-based specifiers to DuckDB epoch functions
1906    EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {
1907        "EPOCH_SECOND": "EPOCH",
1908        "EPOCH_MILLISECOND": "EPOCH_MS",
1909        "EPOCH_MICROSECOND": "EPOCH_US",
1910        "EPOCH_NANOSECOND": "EPOCH_NS",
1911    }
1912
1913    # Template for BITMAP_CONSTRUCT_AGG transpilation
1914    #
1915    # BACKGROUND:
1916    # Snowflake's BITMAP_CONSTRUCT_AGG aggregates integers into a compact binary bitmap.
1917    # Supports values in range 0-32767, this version returns NULL if any value is out of range
1918    # See: https://docs.snowflake.com/en/sql-reference/functions/bitmap_construct_agg
1919    # See: https://docs.snowflake.com/en/user-guide/querying-bitmaps-for-distinct-counts
1920    #
1921    # Snowflake uses two different formats based on the number of unique values:
1922    #
1923    # Format 1 - Small bitmap (< 5 unique values): Length of 10 bytes
1924    #   Bytes 0-1: Count of values as 2-byte big-endian integer (e.g., 3 values = 0x0003)
1925    #   Bytes 2-9: Up to 4 values, each as 2-byte little-endian integers, zero-padded to 8 bytes
1926    #   Example: Values [1, 2, 3] -> 0x0003 0100 0200 0300 0000 (hex)
1927    #                                count  v1   v2   v3   pad
1928    #
1929    # Format 2 - Large bitmap (>= 5 unique values): Length of 10 + (2 * count) bytes
1930    #   Bytes 0-9: Fixed header 0x08 followed by 9 zero bytes
1931    #   Bytes 10+: Each value as 2-byte little-endian integer (no padding)
1932    #   Example: Values [1,2,3,4,5] -> 0x08 00000000 00000000 00 0100 0200 0300 0400 0500
1933    #                                  hdr  ----9 zero bytes----  v1   v2   v3   v4   v5
1934    #
1935    # TEMPLATE STRUCTURE
1936    #
1937    # Phase 1 - Innermost subquery: Data preparation
1938    #   SELECT LIST_SORT(...) AS l
1939    #   - Aggregates all input values into a list, remove NULLs, duplicates and sorts
1940    #   Result: Clean, sorted list of unique non-null integers stored as 'l'
1941    #
1942    # Phase 2 - Middle subquery: Hex string construction
1943    #   LIST_TRANSFORM(...)
1944    #   - Converts each integer to 2-byte little-endian hex representation
1945    #   - & 255 extracts low byte, >> 8 extracts high byte
1946    #   - LIST_REDUCE: Concatenates all hex pairs into single string 'h'
1947    #   Result: Hex string of all values
1948    #
1949    # Phase 3 - Outer SELECT: Final bitmap assembly
1950    #   LENGTH(l) < 5:
1951    #   - Small format: 2-byte count (big-endian via %04X) + values + zero padding
1952    #   LENGTH(l) >= 5:
1953    #   - Large format: Fixed 10-byte header + values (no padding needed)
1954    #   Result: Complete binary bitmap as BLOB
1955    #
1956    BITMAP_CONSTRUCT_AGG_TEMPLATE: exp.Expr = exp.maybe_parse(
1957        """
1958        SELECT CASE
1959            WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL
1960            WHEN LENGTH(l) != LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL
1961            WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2))
1962            ELSE UNHEX('08000000000000000000' || h)
1963        END
1964        FROM (
1965            SELECT l, COALESCE(LIST_REDUCE(
1966                LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)),
1967                (__a, __b) -> __a || __b, ''
1968            ), '') AS h
1969            FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(:arg) FILTER(NOT :arg IS NULL))) AS l)
1970        )
1971        """
1972    )
1973
1974    # Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1975    RANDSTR_TEMPLATE: exp.Expr = exp.maybe_parse(
1976        f"""
1977        SELECT LISTAGG(
1978            SUBSTRING(
1979                '{RANDSTR_CHAR_POOL}',
1980                1 + CAST(FLOOR(random_value * 62) AS INT),
1981                1
1982            ),
1983            ''
1984        )
1985        FROM (
1986            SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1987            FROM RANGE(:length) AS t(i)
1988        )
1989        """,
1990    )
1991
1992    # Template for MINHASH transpilation
1993    # Computes k minimum hash values across aggregated data using DuckDB list functions
1994    # Returns JSON matching Snowflake format: {"state": [...], "type": "minhash", "version": 1}
1995    MINHASH_TEMPLATE: exp.Expr = exp.maybe_parse(
1996        """
1997        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed), 'type', 'minhash', 'version', 1)
1998        FROM (
1999            SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS VARCHAR) || CAST(seed AS VARCHAR)))) AS min_h
2000            FROM (SELECT LIST(:expr) AS vals), RANGE(0, :k) AS t(seed)
2001        )
2002        """,
2003    )
2004
2005    # Template for MINHASH_COMBINE transpilation
2006    # Combines multiple minhash signatures by taking element-wise minimum
2007    MINHASH_COMBINE_TEMPLATE: exp.Expr = exp.maybe_parse(
2008        """
2009        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx), 'type', 'minhash', 'version', 1)
2010        FROM (
2011            SELECT
2012                pos AS idx,
2013                MIN(val) AS min_h
2014            FROM
2015                UNNEST(LIST(:expr)) AS _(sig),
2016                UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos)
2017            GROUP BY pos
2018        )
2019        """,
2020    )
2021
2022    # Template for APPROXIMATE_SIMILARITY transpilation
2023    # Computes multi-way Jaccard similarity: fraction of positions where ALL signatures agree
2024    APPROXIMATE_SIMILARITY_TEMPLATE: exp.Expr = exp.maybe_parse(
2025        """
2026        SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*)
2027        FROM (
2028            SELECT pos, COUNT(DISTINCT h) AS num_distinct
2029            FROM (
2030                SELECT h, pos
2031                FROM UNNEST(LIST(:expr)) AS _(sig),
2032                     UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos)
2033            )
2034            GROUP BY pos
2035        )
2036        """,
2037    )
2038
2039    # Template for ARRAYS_ZIP transpilation
2040    # Snowflake pads to longest array; DuckDB LIST_ZIP truncates to shortest
2041    # Uses RANGE + indexing to match Snowflake behavior
2042    ARRAYS_ZIP_TEMPLATE: exp.Expr = exp.maybe_parse(
2043        """
2044        CASE WHEN :null_check THEN NULL
2045        WHEN :all_empty_check THEN [:empty_struct]
2046        ELSE LIST_TRANSFORM(RANGE(0, :max_len), __i -> :transform_struct)
2047        END
2048        """,
2049    )
2050
2051    # Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
2052    # Each element is paired with its 1-based position via LIST_ZIP, then filtered
2053    # by a comparison operator (supplied via :cond) that determines the operation:
2054    #   EXCEPT (>):        keep the N-th occurrence only if N > count in arr2
2055    #                      e.g. [2,2,2] EXCEPT [2,2] -> [2]
2056    #   INTERSECTION (<=): keep the N-th occurrence only if N <= count in arr2
2057    #                      e.g. [2,2,2] INTERSECT [2,2] -> [2,2]
2058    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2059    ARRAY_BAG_TEMPLATE: exp.Expr = exp.maybe_parse(
2060        """
2061        CASE
2062            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2063            ELSE LIST_TRANSFORM(
2064                LIST_FILTER(
2065                    LIST_ZIP(:arr1, GENERATE_SERIES(1, LEN(:arr1))),
2066                    pair -> :cond
2067                ),
2068                pair -> pair[0]
2069            )
2070        END
2071        """
2072    )
2073
2074    ARRAY_EXCEPT_CONDITION: exp.Expr = exp.maybe_parse(
2075        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2076        " > LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2077    )
2078
2079    ARRAY_INTERSECTION_CONDITION: exp.Expr = exp.maybe_parse(
2080        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2081        " <= LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2082    )
2083
2084    # Set semantics for ARRAY_EXCEPT. Deduplicates arr1 via LIST_DISTINCT, then
2085    # filters out any element that appears at least once in arr2.
2086    #   e.g. [1,1,2,3] EXCEPT [1] -> [2,3]
2087    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2088    ARRAY_EXCEPT_SET_TEMPLATE: exp.Expr = exp.maybe_parse(
2089        """
2090        CASE
2091            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2092            ELSE LIST_FILTER(
2093                LIST_DISTINCT(:arr1),
2094                e -> LEN(LIST_FILTER(:arr2, x -> x IS NOT DISTINCT FROM e)) = 0
2095            )
2096        END
2097        """
2098    )
2099
2100    # Template for STRTOK function transpilation
2101    #
2102    # DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
2103    # We may need to adjust this if we want to support transpilation from other dialects
2104    #
2105    # CASE
2106    #     -- Snowflake: empty delimiter + empty input string -> NULL
2107    #     WHEN delimiter = '' AND input_str = '' THEN NULL
2108    #
2109    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return input string if index is 1
2110    #     WHEN delimiter = '' AND index = 1 THEN input_str
2111    #
2112    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return NULL if index is not 1
2113    #     WHEN delimiter = '' THEN NULL
2114    #
2115    #     -- Snowflake: negative indices return NULL
2116    #     WHEN index < 0 THEN NULL
2117    #
2118    #     -- Snowflake: return NULL if any argument is NULL
2119    #     WHEN input_str IS NULL OR delimiter IS NULL OR index IS NULL THEN NULL
2120    #
2121    #
2122    #     ELSE LIST_FILTER(
2123    #         REGEXP_SPLIT_TO_ARRAY(
2124    #             input_str,
2125    #             CASE
2126    #                 -- if delimiter is '', we don't want to surround it with '[' and ']' as '[]' is invalid for DuckDB
2127    #                 WHEN delimiter = '' THEN ''
2128    #
2129    #                 -- handle problematic regex characters in delimiter with REGEXP_REPLACE
2130    #                 -- turn delimiter into a regex char set, otherwise DuckDB will match in order, which we don't want
2131    #                 ELSE '[' || REGEXP_REPLACE(delimiter, problematic_char_set, '\\\1', 'g') || ']'
2132    #             END
2133    #         ),
2134    #
2135    #         -- Snowflake: don't return empty strings
2136    #         x -> NOT x = ''
2137    #     )[index]
2138    # END
2139    STRTOK_TEMPLATE: exp.Expr = exp.maybe_parse(
2140        """
2141        CASE
2142            WHEN :delimiter = '' AND :string = '' THEN NULL
2143            WHEN :delimiter = '' AND :part_index = 1 THEN :string
2144            WHEN :delimiter = '' THEN NULL
2145            WHEN :part_index < 0 THEN NULL
2146            WHEN :string IS NULL OR :delimiter IS NULL OR :part_index IS NULL THEN NULL
2147            ELSE :base_func
2148        END
2149        """
2150    )
2151
2152    def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str:
2153        cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2))
2154        return self.sql(
2155            exp.replace_placeholders(self.ARRAY_BAG_TEMPLATE, arr1=arr1, arr2=arr2, cond=cond)
2156        )
2157
2158    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2159        """
2160        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2161
2162        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2163        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2164
2165        For 'END' kind, add the interval to get the end of the slice.
2166        For DATE type with 'END', cast result back to DATE to preserve type.
2167        """
2168        date_expr = expression.this
2169        slice_length = expression.expression
2170        unit = expression.unit
2171        kind = expression.text("kind").upper()
2172
2173        # Create INTERVAL expression: INTERVAL 'N' UNIT
2174        interval_expr = exp.Interval(this=slice_length, unit=unit)
2175
2176        # Create base time_bucket expression
2177        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2178
2179        # Check if we need the end of the slice (default is start)
2180        if not kind == "END":
2181            # For 'START', return time_bucket directly
2182            return self.sql(time_bucket_expr)
2183
2184        # For 'END', add the interval to get end of slice
2185        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2186
2187        # If input is DATE type, cast result back to DATE to preserve type
2188        # DuckDB converts DATE to TIMESTAMP when adding intervals
2189        if date_expr.is_type(exp.DType.DATE):
2190            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2191
2192        return self.sql(add_expr)
2193
2194    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2195        """
2196        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2197
2198        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2199        - Each bucket covers 32,768 values
2200        - Bucket numbering starts at 1
2201        - Formula: ((value - 1) // 32768) + 1 for positive values
2202
2203        For non-positive values (0 and negative), we use value // 32768 to avoid
2204        producing bucket 0 or positive bucket IDs for negative inputs.
2205        """
2206        value = expression.this
2207
2208        positive_formula = ((value - 1) // 32768) + 1
2209        non_positive_formula = value // 32768
2210
2211        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2212        case_expr = (
2213            exp.case()
2214            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2215            .else_(non_positive_formula)
2216        )
2217        return self.sql(case_expr)
2218
2219    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2220        """
2221        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2222
2223        Snowflake's BITMAP_BIT_POSITION behavior:
2224        - For n <= 0: returns ABS(n) % 32768
2225        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2226        """
2227        this = expression.this
2228
2229        return self.sql(
2230            exp.Mod(
2231                this=exp.Paren(
2232                    this=exp.If(
2233                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2234                        true=this - exp.Literal.number(1),
2235                        false=exp.Abs(this=this),
2236                    )
2237                ),
2238                expression=MAX_BIT_POSITION,
2239            )
2240        )
2241
2242    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2243        """
2244        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2245        Uses a pre-parsed template with placeholders replaced by expression nodes.
2246
2247        Snowflake bitmap format:
2248        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2249        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2250        """
2251        arg = expression.this
2252        return (
2253            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2254        )
2255
2256    def compress_sql(self, expression: exp.Compress) -> str:
2257        self.unsupported("DuckDB does not support the COMPRESS() function")
2258        return self.function_fallback_sql(expression)
2259
2260    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2261        self.unsupported("ENCRYPT is not supported in DuckDB")
2262        return self.function_fallback_sql(expression)
2263
2264    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2265        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2266        self.unsupported(f"{func_name} is not supported in DuckDB")
2267        return self.function_fallback_sql(expression)
2268
2269    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2270        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2271        self.unsupported(f"{func_name} is not supported in DuckDB")
2272        return self.function_fallback_sql(expression)
2273
2274    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2275        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2276        return self.function_fallback_sql(expression)
2277
2278    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2279        self.unsupported("PARSE_URL is not supported in DuckDB")
2280        return self.function_fallback_sql(expression)
2281
2282    def parseip_sql(self, expression: exp.ParseIp) -> str:
2283        self.unsupported("PARSE_IP is not supported in DuckDB")
2284        return self.function_fallback_sql(expression)
2285
2286    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2287        this = expression.this
2288        expr = expression.expression
2289
2290        if expression.args.get("case_insensitive"):
2291            this = exp.Upper(this=this)
2292            expr = exp.Upper(this=expr)
2293
2294        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2295
2296        if expression.args.get("integer_scale"):
2297            result = exp.cast(result * 100, "INTEGER")
2298
2299        return self.sql(result)
2300
2301    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2302        from_first = expression.args.get("from_first", True)
2303        if not from_first:
2304            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2305
2306        return self.function_fallback_sql(expression)
2307
2308    def randstr_sql(self, expression: exp.Randstr) -> str:
2309        """
2310        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2311        Uses a pre-parsed template with placeholders replaced by expression nodes.
2312
2313        RANDSTR(length, generator) generates a random string of specified length.
2314        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2315        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2316        - No generator: Use default seed value
2317        """
2318        length = expression.this
2319        generator = expression.args.get("generator")
2320
2321        if generator:
2322            if isinstance(generator, exp.Rand):
2323                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2324                seed_value = generator.this or generator
2325            else:
2326                # Const/int or other expression - use as seed directly
2327                seed_value = generator
2328        else:
2329            # No generator specified, use default seed (arbitrary but deterministic)
2330            seed_value = exp.Literal.number(RANDSTR_SEED)
2331
2332        replacements = {"seed": seed_value, "length": length}
2333        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
2334
2335    @unsupported_args("finish")
2336    def reduce_sql(self, expression: exp.Reduce) -> str:
2337        array_arg = expression.this
2338        initial_value = expression.args.get("initial")
2339        merge_lambda = expression.args.get("merge")
2340
2341        if merge_lambda:
2342            merge_lambda.set("colon", True)
2343
2344        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
2345
2346    def zipf_sql(self, expression: exp.Zipf) -> str:
2347        """
2348        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2349        Uses a pre-parsed template with placeholders replaced by expression nodes.
2350        """
2351        s = expression.this
2352        n = expression.args["elementcount"]
2353        gen = expression.args["gen"]
2354
2355        if not isinstance(gen, exp.Rand):
2356            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2357            random_expr: exp.Expr = exp.Div(
2358                this=exp.Paren(
2359                    this=exp.Mod(
2360                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2361                        expression=exp.Literal.number(1000000),
2362                    )
2363                ),
2364                expression=exp.Literal.number(1000000.0),
2365            )
2366        else:
2367            # Use RANDOM() for non-deterministic output
2368            random_expr = exp.Rand()
2369
2370        replacements = {"s": s, "n": n, "random_expr": random_expr}
2371        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
2372
2373    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2374        """
2375        TO_BINARY and TRY_TO_BINARY transpilation:
2376        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2377        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2378        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2379
2380        For TRY_TO_BINARY (safe=True), wrap with TRY():
2381        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2382        """
2383        value = expression.this
2384        format_arg = expression.args.get("format")
2385        is_safe = expression.args.get("safe")
2386        is_binary = _is_binary(expression)
2387
2388        if not format_arg and not is_binary:
2389            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2390            return self.func(func_name, value)
2391
2392        # Snowflake defaults to HEX encoding when no format is specified
2393        fmt = format_arg.name.upper() if format_arg else "HEX"
2394
2395        if fmt in ("UTF-8", "UTF8"):
2396            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2397            result = self.func("ENCODE", value)
2398        elif fmt == "BASE64":
2399            result = self.func("FROM_BASE64", value)
2400        elif fmt == "HEX":
2401            result = self.func("UNHEX", value)
2402        else:
2403            if is_safe:
2404                return self.sql(exp.null())
2405            else:
2406                self.unsupported(f"format {fmt} is not supported")
2407                result = self.func("TO_BINARY", value)
2408        return f"TRY({result})" if is_safe else result
2409
2410    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2411        fmt = expression.args.get("format")
2412        precision = expression.args.get("precision")
2413        scale = expression.args.get("scale")
2414
2415        if not fmt and precision and scale:
2416            return self.sql(
2417                exp.cast(
2418                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2419                )
2420            )
2421
2422        return super().tonumber_sql(expression)
2423
2424    def _greatest_least_sql(self, expression: exp.Greatest | exp.Least) -> str:
2425        """
2426        Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
2427
2428        - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL
2429        - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
2430        """
2431        # Get all arguments
2432        all_args = [expression.this, *expression.expressions]
2433        fallback_sql = self.function_fallback_sql(expression)
2434
2435        if expression.args.get("ignore_nulls"):
2436            # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
2437            return self.sql(fallback_sql)
2438
2439        # return NULL if any argument is NULL
2440        case_expr = exp.case().when(
2441            exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
2442            exp.null(),
2443            copy=False,
2444        )
2445        case_expr.set("default", fallback_sql)
2446        return self.sql(case_expr)
2447
2448    def generator_sql(self, expression: exp.Generator) -> str:
2449        # Transpile Snowflake GENERATOR to DuckDB range()
2450        rowcount = expression.args.get("rowcount")
2451        time_limit = expression.args.get("time_limit")
2452
2453        if time_limit:
2454            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2455
2456        if not rowcount:
2457            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2458            return self.func("range", exp.Literal.number(0))
2459
2460        return self.func("range", rowcount)
2461
2462    def greatest_sql(self, expression: exp.Greatest) -> str:
2463        return self._greatest_least_sql(expression)
2464
2465    def least_sql(self, expression: exp.Least) -> str:
2466        return self._greatest_least_sql(expression)
2467
2468    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2469        if expression.args.get("colon"):
2470            prefix = "LAMBDA "
2471            arrow_sep = ":"
2472            wrap = False
2473        else:
2474            prefix = ""
2475
2476        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2477        return f"{prefix}{lambda_sql}"
2478
2479    def show_sql(self, expression: exp.Show) -> str:
2480        from_ = self.sql(expression, "from_")
2481        from_ = f" FROM {from_}" if from_ else ""
2482        return f"SHOW {expression.name}{from_}"
2483
2484    def soundex_sql(self, expression: exp.Soundex) -> str:
2485        self.unsupported("SOUNDEX is not supported in DuckDB")
2486        return self.func("SOUNDEX", expression.this)
2487
2488    def sortarray_sql(self, expression: exp.SortArray) -> str:
2489        arr = expression.this
2490        asc = expression.args.get("asc")
2491        nulls_first = expression.args.get("nulls_first")
2492
2493        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2494            return self.func("LIST_SORT", arr, asc, nulls_first)
2495
2496        nulls_are_first = nulls_first == exp.true()
2497        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2498
2499        if not isinstance(asc, exp.Boolean):
2500            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2501
2502        descending = asc == exp.false()
2503
2504        if not descending and not nulls_are_first:
2505            return self.func("LIST_SORT", arr)
2506        if not nulls_are_first:
2507            return self.func("ARRAY_REVERSE_SORT", arr)
2508        return self.func(
2509            "LIST_SORT",
2510            arr,
2511            exp.Literal.string("DESC" if descending else "ASC"),
2512            exp.Literal.string("NULLS FIRST"),
2513        )
2514
2515    def install_sql(self, expression: exp.Install) -> str:
2516        force = "FORCE " if expression.args.get("force") else ""
2517        this = self.sql(expression, "this")
2518        from_clause = expression.args.get("from_")
2519        from_clause = f" FROM {from_clause}" if from_clause else ""
2520        return f"{force}INSTALL {this}{from_clause}"
2521
2522    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2523        self.unsupported(
2524            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2525        )
2526        return self.function_fallback_sql(expression)
2527
2528    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2529        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
2530
2531    def strposition_sql(self, expression: exp.StrPosition) -> str:
2532        this = expression.this
2533        substr = expression.args.get("substr")
2534        position = expression.args.get("position")
2535
2536        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2537        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2538        if _is_binary(this):
2539            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2540            hex_strpos = exp.StrPosition(
2541                this=exp.Hex(this=this),
2542                substr=exp.Hex(this=substr),
2543            )
2544
2545            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2546
2547        # For VARCHAR: handle clamp_position
2548        if expression.args.get("clamp_position") and position:
2549            expression = expression.copy()
2550            expression.set(
2551                "position",
2552                exp.If(
2553                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2554                    true=exp.Literal.number(1),
2555                    false=position.copy(),
2556                ),
2557            )
2558
2559        return strposition_sql(self, expression)
2560
2561    def substring_sql(self, expression: exp.Substring) -> str:
2562        if expression.args.get("zero_start"):
2563            start = expression.args.get("start")
2564            length = expression.args.get("length")
2565
2566            if start := expression.args.get("start"):
2567                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2568            if length := expression.args.get("length"):
2569                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2570
2571            return self.func("SUBSTRING", expression.this, start, length)
2572
2573        return self.function_fallback_sql(expression)
2574
2575    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2576        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2577        target_type = expression.args.get("target_type")
2578        needs_tz = target_type and target_type.this in (
2579            exp.DType.TIMESTAMPLTZ,
2580            exp.DType.TIMESTAMPTZ,
2581        )
2582
2583        if expression.args.get("safe"):
2584            formatted_time = self.format_time(expression)
2585            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2586            return self.sql(
2587                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2588            )
2589
2590        base_sql = str_to_time_sql(self, expression)
2591        if needs_tz:
2592            return self.sql(
2593                exp.cast(
2594                    base_sql,
2595                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2596                )
2597            )
2598        return base_sql
2599
2600    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2601        formatted_time = self.format_time(expression)
2602        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2603        return self.sql(
2604            exp.cast(
2605                self.func(function_name, expression.this, formatted_time),
2606                exp.DataType(this=exp.DType.DATE),
2607            )
2608        )
2609
2610    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2611        this = expression.this
2612        time_format = self.format_time(expression)
2613        safe = expression.args.get("safe")
2614        time_type = exp.DataType.build("TIME", dialect="duckdb")
2615        cast_expr = exp.TryCast if safe else exp.Cast
2616
2617        if time_format:
2618            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2619            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2620            return self.sql(cast_expr(this=strptime, to=time_type))
2621
2622        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2623            return self.sql(this)
2624
2625        return self.sql(cast_expr(this=this, to=time_type))
2626
2627    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2628        if not expression.this:
2629            return "CURRENT_DATE"
2630
2631        expr = exp.Cast(
2632            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2633            to=exp.DataType(this=exp.DType.DATE),
2634        )
2635        return self.sql(expr)
2636
2637    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2638        arg = expression.this
2639        return self.sql(
2640            exp.case()
2641            .when(
2642                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2643                exp.null(),
2644            )
2645            .else_(exp.Literal.string("Invalid JSON"))
2646        )
2647
2648    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2649        arg = expression.this
2650        if expression.args.get("safe"):
2651            return self.sql(
2652                exp.case()
2653                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2654                .else_(exp.null())
2655            )
2656        return self.func("JSON", arg)
2657
2658    def unicode_sql(self, expression: exp.Unicode) -> str:
2659        if expression.args.get("empty_is_zero"):
2660            return self.sql(
2661                exp.case()
2662                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2663                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2664            )
2665
2666        return self.func("UNICODE", expression.this)
2667
2668    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2669        return self.sql(
2670            exp.case()
2671            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2672            .else_(expression.this)
2673        )
2674
2675    def trunc_sql(self, expression: exp.Trunc) -> str:
2676        decimals = expression.args.get("decimals")
2677        if (
2678            expression.args.get("fractions_supported")
2679            and decimals
2680            and not decimals.is_type(exp.DType.INT)
2681        ):
2682            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2683
2684        return self.func("TRUNC", expression.this, decimals)
2685
2686    def normal_sql(self, expression: exp.Normal) -> str:
2687        """
2688        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2689
2690        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2691        """
2692        mean = expression.this
2693        stddev = expression.args["stddev"]
2694        gen: exp.Expr = expression.args["gen"]
2695
2696        # Build two uniform random values [0, 1) for Box-Muller transform
2697        if isinstance(gen, exp.Rand) and gen.this is None:
2698            u1: exp.Expr = exp.Rand()
2699            u2: exp.Expr = exp.Rand()
2700        else:
2701            # Seeded: derive two values using HASH with different inputs
2702            seed = gen.this if isinstance(gen, exp.Rand) else gen
2703            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2704            u2 = exp.replace_placeholders(
2705                self.SEEDED_RANDOM_TEMPLATE,
2706                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2707            )
2708
2709        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2710        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
2711
2712    def uniform_sql(self, expression: exp.Uniform) -> str:
2713        """
2714        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2715
2716        UNIFORM returns a random value in [min, max]:
2717        - Integer result if both min and max are integers
2718        - Float result if either min or max is a float
2719        """
2720        min_val = expression.this
2721        max_val = expression.expression
2722        gen = expression.args.get("gen")
2723
2724        # Determine if result should be integer (both bounds are integers).
2725        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2726        is_int_result = min_val.is_int and max_val.is_int
2727
2728        # Build the random value expression [0, 1)
2729        if not isinstance(gen, exp.Rand):
2730            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2731            random_expr: exp.Expr = exp.Div(
2732                this=exp.Paren(
2733                    this=exp.Mod(
2734                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2735                        expression=exp.Literal.number(1000000),
2736                    )
2737                ),
2738                expression=exp.Literal.number(1000000.0),
2739            )
2740        else:
2741            random_expr = exp.Rand()
2742
2743        # Build: min + random * (max - min [+ 1 for int])
2744        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2745        if is_int_result:
2746            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2747
2748        result: exp.Expr = exp.Add(
2749            this=min_val,
2750            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2751        )
2752
2753        if is_int_result:
2754            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2755
2756        return self.sql(result)
2757
2758    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2759        nano = expression.args.get("nano")
2760        overflow = expression.args.get("overflow")
2761
2762        # Snowflake's TIME_FROM_PARTS supports overflow
2763        if overflow:
2764            hour = expression.args["hour"]
2765            minute = expression.args["min"]
2766            sec = expression.args["sec"]
2767
2768            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2769            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2770                try:
2771                    h_val = hour.to_py()
2772                    m_val = minute.to_py()
2773                    s_val = sec.to_py()
2774                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2775                        return rename_func("MAKE_TIME")(self, expression)
2776                except ValueError:
2777                    pass
2778
2779            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2780            if nano:
2781                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2782
2783            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2784
2785            return self.sql(
2786                exp.Add(
2787                    this=exp.Cast(
2788                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2789                    ),
2790                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2791                )
2792            )
2793
2794        # Default: MAKE_TIME
2795        if nano:
2796            expression.set(
2797                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2798            )
2799
2800        return rename_func("MAKE_TIME")(self, expression)
2801
2802    def extract_sql(self, expression: exp.Extract) -> str:
2803        """
2804        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2805
2806        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2807        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2808        """
2809        this = expression.this
2810        datetime_expr = expression.expression
2811
2812        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2813        # because Snowflake applies server timezone while DuckDB uses local timezone
2814        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2815            self.unsupported(
2816                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2817            )
2818
2819        part_name = this.name.upper()
2820
2821        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2822            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2823
2824            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2825            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2826            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2827                exp.DType.TIME, exp.DType.TIMETZ
2828            )
2829
2830            if is_nano_time:
2831                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2832                return self.sql(
2833                    exp.cast(
2834                        exp.Mul(
2835                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2836                            expression=exp.Literal.number(1000),
2837                        ),
2838                        exp.DataType.build(cast_type, dialect="duckdb"),
2839                    )
2840                )
2841
2842            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2843            strftime_input = datetime_expr
2844            if part_name == "NANOSECOND":
2845                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2846
2847            return self.sql(
2848                exp.cast(
2849                    exp.Anonymous(
2850                        this="STRFTIME",
2851                        expressions=[strftime_input, exp.Literal.string(fmt)],
2852                    ),
2853                    exp.DataType.build(cast_type, dialect="duckdb"),
2854                )
2855            )
2856
2857        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2858            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2859            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2860            # EPOCH returns float, cast to BIGINT for integer result
2861            if part_name == "EPOCH_SECOND":
2862                result = exp.cast(result, exp.DataType.build("BIGINT", dialect="duckdb"))
2863            return self.sql(result)
2864
2865        return super().extract_sql(expression)
2866
2867    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2868        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2869        date_expr = expression.this
2870        time_expr = expression.expression
2871
2872        if date_expr is not None and time_expr is not None:
2873            # In DuckDB, DATE + TIME produces TIMESTAMP
2874            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2875
2876        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2877        sec = expression.args.get("sec")
2878        if sec is None:
2879            # This shouldn't happen with valid input, but handle gracefully
2880            return rename_func("MAKE_TIMESTAMP")(self, expression)
2881
2882        milli = expression.args.get("milli")
2883        if milli is not None:
2884            sec += milli.pop() / exp.Literal.number(1000.0)
2885
2886        nano = expression.args.get("nano")
2887        if nano is not None:
2888            sec += nano.pop() / exp.Literal.number(1000000000.0)
2889
2890        if milli or nano:
2891            expression.set("sec", sec)
2892
2893        return rename_func("MAKE_TIMESTAMP")(self, expression)
2894
2895    @unsupported_args("nano")
2896    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2897        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2898        if nano := expression.args.get("nano"):
2899            nano.pop()
2900
2901        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2902        return f"CAST({timestamp} AS TIMESTAMPTZ)"
2903
2904    @unsupported_args("nano")
2905    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2906        # Extract zone before popping
2907        zone = expression.args.get("zone")
2908        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2909        if zone:
2910            zone = zone.pop()
2911
2912        if nano := expression.args.get("nano"):
2913            nano.pop()
2914
2915        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2916
2917        if zone:
2918            # Use AT TIME ZONE to apply the explicit timezone
2919            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2920
2921        return timestamp
2922
2923    def tablesample_sql(
2924        self,
2925        expression: exp.TableSample,
2926        tablesample_keyword: str | None = None,
2927    ) -> str:
2928        if not isinstance(expression.parent, exp.Select):
2929            # This sample clause only applies to a single source, not the entire resulting relation
2930            tablesample_keyword = "TABLESAMPLE"
2931
2932        if expression.args.get("size"):
2933            method = expression.args.get("method")
2934            if method and method.name.upper() != "RESERVOIR":
2935                self.unsupported(
2936                    f"Sampling method {method} is not supported with a discrete sample count, "
2937                    "defaulting to reservoir sampling"
2938                )
2939                expression.set("method", exp.var("RESERVOIR"))
2940
2941        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
2942
2943    def join_sql(self, expression: exp.Join) -> str:
2944        if (
2945            not expression.args.get("using")
2946            and not expression.args.get("on")
2947            and not expression.method
2948            and (expression.kind in ("", "INNER", "OUTER"))
2949        ):
2950            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2951            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2952            if isinstance(expression.this, exp.Unnest):
2953                return super().join_sql(expression.on(exp.true()))
2954
2955            expression.set("side", None)
2956            expression.set("kind", None)
2957
2958        return super().join_sql(expression)
2959
2960    def countif_sql(self, expression: exp.CountIf) -> str:
2961        if self.dialect.version >= (1, 2):
2962            return self.function_fallback_sql(expression)
2963
2964        # https://github.com/tobymao/sqlglot/pull/4749
2965        return count_if_to_sum(self, expression)
2966
2967    def bracket_sql(self, expression: exp.Bracket) -> str:
2968        if self.dialect.version >= (1, 2):
2969            return super().bracket_sql(expression)
2970
2971        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
2972        this = expression.this
2973        if isinstance(this, exp.Array):
2974            this.replace(exp.paren(this))
2975
2976        bracket = super().bracket_sql(expression)
2977
2978        if not expression.args.get("returns_list_for_maps"):
2979            if not this.type:
2980                from sqlglot.optimizer.annotate_types import annotate_types
2981
2982                this = annotate_types(this, dialect=self.dialect)
2983
2984            if this.is_type(exp.DType.MAP):
2985                bracket = f"({bracket})[1]"
2986
2987        return bracket
2988
2989    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
2990        func = expression.this
2991
2992        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
2993        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
2994        if isinstance(func, exp.ArrayAgg):
2995            if not isinstance(order := expression.expression, exp.Order):
2996                return self.sql(func)
2997
2998            # Save the original column for FILTER clause (before wrapping with Order)
2999            original_this = func.this
3000
3001            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3002            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3003            func.set(
3004                "this",
3005                exp.Order(
3006                    this=func.this.copy(),
3007                    expressions=order.expressions,
3008                ),
3009            )
3010
3011            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3012            # Use original_this (not the Order-wrapped version) for the FILTER condition
3013            array_agg_sql = self.function_fallback_sql(func)
3014            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3015
3016        # For other functions (like PERCENTILES), use existing logic
3017        expression_sql = self.sql(expression, "expression")
3018
3019        if isinstance(func, exp.PERCENTILES):
3020            # Make the order key the first arg and slide the fraction to the right
3021            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3022            order_col = expression.find(exp.Ordered)
3023            if order_col:
3024                func.set("expression", func.this)
3025                func.set("this", order_col.this)
3026
3027        this = self.sql(expression, "this").rstrip(")")
3028
3029        return f"{this}{expression_sql})"
3030
3031    def length_sql(self, expression: exp.Length) -> str:
3032        arg = expression.this
3033
3034        # Dialects like BQ and Snowflake also accept binary values as args, so
3035        # DDB will attempt to infer the type or resort to case/when resolution
3036        if not expression.args.get("binary") or arg.is_string:
3037            return self.func("LENGTH", arg)
3038
3039        if not arg.type:
3040            from sqlglot.optimizer.annotate_types import annotate_types
3041
3042            arg = annotate_types(arg, dialect=self.dialect)
3043
3044        if arg.is_type(*exp.DataType.TEXT_TYPES):
3045            return self.func("LENGTH", arg)
3046
3047        # We need these casts to make duckdb's static type checker happy
3048        blob = exp.cast(arg, exp.DType.VARBINARY)
3049        varchar = exp.cast(arg, exp.DType.VARCHAR)
3050
3051        case = (
3052            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3053            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3054            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3055        )
3056        return self.sql(case)
3057
3058    def bitlength_sql(self, expression: exp.BitLength) -> str:
3059        if not _is_binary(arg := expression.this):
3060            return self.func("BIT_LENGTH", arg)
3061
3062        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3063        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
3064
3065    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3066        arg = expression.expressions[0]
3067        if arg.is_type(*exp.DataType.REAL_TYPES):
3068            arg = exp.cast(arg, exp.DType.INT)
3069        return self.func("CHR", arg)
3070
3071    def collation_sql(self, expression: exp.Collation) -> str:
3072        self.unsupported("COLLATION function is not supported by DuckDB")
3073        return self.function_fallback_sql(expression)
3074
3075    def collate_sql(self, expression: exp.Collate) -> str:
3076        if not expression.expression.is_string:
3077            return super().collate_sql(expression)
3078
3079        raw = expression.expression.name
3080        if not raw:
3081            return self.sql(expression.this)
3082
3083        parts = []
3084        for part in raw.split("-"):
3085            lower = part.lower()
3086            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3087                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3088                    self.unsupported(
3089                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3090                    )
3091                parts.append(lower)
3092
3093        if not parts:
3094            return self.sql(expression.this)
3095        return super().collate_sql(
3096            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3097        )
3098
3099    def _validate_regexp_flags(self, flags: exp.Expr | None, supported_flags: str) -> str | None:
3100        """
3101        Validate and filter regexp flags for DuckDB compatibility.
3102
3103        Args:
3104            flags: The flags expression to validate
3105            supported_flags: String of supported flags (e.g., "ims", "cims").
3106                            Only these flags will be returned.
3107
3108        Returns:
3109            Validated/filtered flag string, or None if no valid flags remain
3110        """
3111        if not isinstance(flags, exp.Expr):
3112            return None
3113
3114        if not flags.is_string:
3115            self.unsupported("Non-literal regexp flags are not fully supported in DuckDB")
3116            return None
3117
3118        flag_str = flags.this
3119        unsupported = set(flag_str) - set(supported_flags)
3120
3121        if unsupported:
3122            self.unsupported(
3123                f"Regexp flags {sorted(unsupported)} are not supported in this context"
3124            )
3125
3126        flag_str = "".join(f for f in flag_str if f in supported_flags)
3127        return flag_str if flag_str else None
3128
3129    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3130        this = expression.this
3131        pattern = expression.expression
3132        position = expression.args.get("position")
3133        parameters = expression.args.get("parameters")
3134
3135        # Validate flags - only "ims" flags are supported for embedded patterns
3136        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3137
3138        if position:
3139            this = exp.Substring(this=this, start=position)
3140
3141        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3142        if validated_flags:
3143            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3144
3145        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3146        result = (
3147            exp.case()
3148            .when(
3149                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3150                exp.Literal.number(0),
3151            )
3152            .else_(
3153                exp.Length(
3154                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3155                )
3156            )
3157        )
3158
3159        return self.sql(result)
3160
3161    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3162        subject = expression.this
3163        pattern = expression.expression
3164        replacement = expression.args.get("replacement") or exp.Literal.string("")
3165        position = expression.args.get("position")
3166        occurrence = expression.args.get("occurrence")
3167        modifiers = expression.args.get("modifiers")
3168
3169        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3170
3171        # Handle occurrence (only literals supported)
3172        if occurrence and not occurrence.is_int:
3173            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3174        else:
3175            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3176            if occurrence > 1:
3177                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3178            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3179            elif (
3180                occurrence == 0
3181                and "g" not in validated_flags
3182                and not expression.args.get("single_replace")
3183            ):
3184                validated_flags += "g"
3185
3186        # Handle position (only literals supported)
3187        prefix = None
3188        if position and not position.is_int:
3189            self.unsupported("REGEXP_REPLACE with non-literal position")
3190        elif position and position.is_int and position.to_py() > 1:
3191            pos = position.to_py()
3192            prefix = exp.Substring(
3193                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3194            )
3195            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3196
3197        result: exp.Expr = exp.Anonymous(
3198            this="REGEXP_REPLACE",
3199            expressions=[
3200                subject,
3201                pattern,
3202                replacement,
3203                exp.Literal.string(validated_flags) if validated_flags else None,
3204            ],
3205        )
3206
3207        if prefix:
3208            result = exp.Concat(expressions=[prefix, result])
3209
3210        return self.sql(result)
3211
3212    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3213        this = expression.this
3214        pattern = expression.expression
3215        flag = expression.args.get("flag")
3216
3217        if expression.args.get("full_match"):
3218            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3219            flag = exp.Literal.string(validated_flags) if validated_flags else None
3220            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3221
3222        return self.func("REGEXP_MATCHES", this, pattern, flag)
3223
3224    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3225    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3226        this = expression.this
3227        expr = expression.expression
3228        max_dist = expression.args.get("max_dist")
3229
3230        if max_dist is None:
3231            return self.func("LEVENSHTEIN", this, expr)
3232
3233        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3234        levenshtein = exp.Levenshtein(this=this, expression=expr)
3235        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
3236
3237    def pad_sql(self, expression: exp.Pad) -> str:
3238        """
3239        Handle RPAD/LPAD for VARCHAR and BINARY types.
3240
3241        For VARCHAR: Delegate to parent class
3242        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3243        """
3244        string_arg = expression.this
3245        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3246
3247        if _is_binary(string_arg) or _is_binary(fill_arg):
3248            length_arg = expression.expression
3249            is_left = expression.args.get("is_left")
3250
3251            input_len = exp.ByteLength(this=string_arg)
3252            chars_needed = length_arg - input_len
3253            pad_count = exp.Greatest(
3254                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3255            )
3256            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3257
3258            left, right = string_arg, repeat_expr
3259            if is_left:
3260                left, right = right, left
3261
3262            result = exp.DPipe(this=left, expression=right)
3263            return self.sql(result)
3264
3265        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3266        return super().pad_sql(expression)
3267
3268    def minhash_sql(self, expression: exp.Minhash) -> str:
3269        k = expression.this
3270        exprs = expression.expressions
3271
3272        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3273            self.unsupported(
3274                "MINHASH with multiple expressions or * requires manual query restructuring"
3275            )
3276            return self.func("MINHASH", k, *exprs)
3277
3278        expr = exprs[0]
3279        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3280        return f"({self.sql(result)})"
3281
3282    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3283        expr = expression.this
3284        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3285        return f"({self.sql(result)})"
3286
3287    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3288        expr = expression.this
3289        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3290        return f"({self.sql(result)})"
3291
3292    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3293        return self.sql(
3294            exp.Filter(
3295                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3296                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3297            )
3298        )
3299
3300    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3301        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3302        return self.function_fallback_sql(expression)
3303
3304    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3305        arr = expression.this
3306        func = self.func("LIST_DISTINCT", arr)
3307
3308        if expression.args.get("check_null"):
3309            add_null_to_array = exp.func(
3310                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3311            )
3312            return self.sql(
3313                exp.If(
3314                    this=exp.NEQ(
3315                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3316                    ),
3317                    true=add_null_to_array,
3318                    false=func,
3319                )
3320            )
3321
3322        return func
3323
3324    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3325        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3326            return self._array_bag_sql(
3327                self.ARRAY_INTERSECTION_CONDITION,
3328                expression.expressions[0],
3329                expression.expressions[1],
3330            )
3331        return self.function_fallback_sql(expression)
3332
3333    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3334        arr1, arr2 = expression.this, expression.expression
3335        if expression.args.get("is_multiset"):
3336            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3337        return self.sql(
3338            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3339        )
3340
3341    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3342        """
3343        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3344        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3345        expressions that adjust the index at query time:
3346          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3347          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3348        """
3349        start, end = expression.args.get("start"), expression.args.get("end")
3350
3351        if expression.args.get("zero_based"):
3352            if start is not None:
3353                start = (
3354                    exp.case()
3355                    .when(
3356                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3357                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3358                    )
3359                    .else_(start)
3360                )
3361            if end is not None:
3362                end = (
3363                    exp.case()
3364                    .when(
3365                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3366                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3367                    )
3368                    .else_(end)
3369                )
3370
3371        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))
3372
3373    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3374        args = expression.expressions
3375
3376        if not args:
3377            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3378            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3379
3380        # Build placeholder values for template
3381        lengths = [exp.Length(this=arg) for arg in args]
3382        max_len = (
3383            lengths[0]
3384            if len(lengths) == 1
3385            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3386        )
3387
3388        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3389        empty_struct = exp.func(
3390            "STRUCT",
3391            *[
3392                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3393                for i in range(len(args))
3394            ],
3395        )
3396
3397        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3398        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3399        index = exp.column("__i") + 1
3400        transform_struct = exp.func(
3401            "STRUCT",
3402            *[
3403                exp.PropertyEQ(
3404                    this=exp.Literal.string(f"${i + 1}"),
3405                    expression=exp.func("COALESCE", arg, exp.array())[index],
3406                )
3407                for i, arg in enumerate(args)
3408            ],
3409        )
3410
3411        result = exp.replace_placeholders(
3412            self.ARRAYS_ZIP_TEMPLATE.copy(),
3413            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3414            all_empty_check=exp.and_(
3415                *[
3416                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3417                    for arg in args
3418                ]
3419            ),
3420            empty_struct=empty_struct,
3421            max_len=max_len,
3422            transform_struct=transform_struct,
3423        )
3424        return self.sql(result)
3425
3426    def lower_sql(self, expression: exp.Lower) -> str:
3427        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3428        return _gen_with_cast_to_blob(self, expression, result_sql)
3429
3430    def upper_sql(self, expression: exp.Upper) -> str:
3431        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3432        return _gen_with_cast_to_blob(self, expression, result_sql)
3433
3434    def reverse_sql(self, expression: exp.Reverse) -> str:
3435        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3436        return _gen_with_cast_to_blob(self, expression, result_sql)
3437
3438    def _left_right_sql(self, expression: exp.Left | exp.Right, func_name: str) -> str:
3439        arg = expression.this
3440        length = expression.expression
3441        is_binary = _is_binary(arg)
3442
3443        if is_binary:
3444            # LEFT/RIGHT(blob, n) becomes UNHEX(LEFT/RIGHT(HEX(blob), n * 2))
3445            # Each byte becomes 2 hex chars, so multiply length by 2
3446            hex_arg = exp.Hex(this=arg)
3447            hex_length = exp.Mul(this=length, expression=exp.Literal.number(2))
3448            result: exp.Expression = exp.Unhex(
3449                this=exp.Anonymous(this=func_name, expressions=[hex_arg, hex_length])
3450            )
3451        else:
3452            result = exp.Anonymous(this=func_name, expressions=[arg, length])
3453
3454        if expression.args.get("negative_length_returns_empty"):
3455            empty: exp.Expression = exp.Literal.string("")
3456            if is_binary:
3457                empty = exp.Unhex(this=empty)
3458            result = exp.case().when(length < exp.Literal.number(0), empty).else_(result)
3459
3460        return self.sql(result)
3461
3462    def left_sql(self, expression: exp.Left) -> str:
3463        return self._left_right_sql(expression, "LEFT")
3464
3465    def right_sql(self, expression: exp.Right) -> str:
3466        return self._left_right_sql(expression, "RIGHT")
3467
3468    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3469        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
3470
3471    def stuff_sql(self, expression: exp.Stuff) -> str:
3472        base = expression.this
3473        start = expression.args["start"]
3474        length = expression.args["length"]
3475        insertion = expression.expression
3476        is_binary = _is_binary(base)
3477
3478        if is_binary:
3479            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3480            # (each byte = 2 hex chars), then UNHEX back to BLOB
3481            base = exp.Hex(this=base)
3482            insertion = exp.Hex(this=insertion)
3483            left = exp.Substring(
3484                this=base.copy(),
3485                start=exp.Literal.number(1),
3486                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3487            )
3488            right = exp.Substring(
3489                this=base.copy(),
3490                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3491                + exp.Literal.number(1),
3492            )
3493        else:
3494            left = exp.Substring(
3495                this=base.copy(),
3496                start=exp.Literal.number(1),
3497                length=start.copy() - exp.Literal.number(1),
3498            )
3499            right = exp.Substring(this=base.copy(), start=start + length)
3500        result: exp.Expr = exp.DPipe(
3501            this=exp.DPipe(this=left, expression=insertion), expression=right
3502        )
3503
3504        if is_binary:
3505            result = exp.Unhex(this=result)
3506
3507        return self.sql(result)
3508
3509    def rand_sql(self, expression: exp.Rand) -> str:
3510        seed = expression.this
3511        if seed is not None:
3512            self.unsupported("RANDOM with seed is not supported in DuckDB")
3513
3514        lower = expression.args.get("lower")
3515        upper = expression.args.get("upper")
3516
3517        if lower and upper:
3518            # scale DuckDB's [0,1) to the specified range
3519            range_size = exp.paren(upper - lower)
3520            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3521
3522            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3523            result = exp.cast(scaled, exp.DType.BIGINT)
3524            return self.sql(result)
3525
3526        # Default DuckDB behavior - just return RANDOM() as float
3527        return "RANDOM()"
3528
3529    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3530        arg = expression.this
3531
3532        # Check if it's a text type (handles both literals and annotated expressions)
3533        if arg.is_type(*exp.DataType.TEXT_TYPES):
3534            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3535
3536        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3537        return self.func("OCTET_LENGTH", arg)
3538
3539    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3540        # DuckDB TO_BASE64 requires BLOB input
3541        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3542        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3543        result = expression.this
3544
3545        # Check if input is a string type - ENCODE only accepts VARCHAR
3546        if result.is_type(*exp.DataType.TEXT_TYPES):
3547            result = exp.Encode(this=result)
3548
3549        result = exp.ToBase64(this=result)
3550
3551        max_line_length = expression.args.get("max_line_length")
3552        alphabet = expression.args.get("alphabet")
3553
3554        # Handle custom alphabet by replacing standard chars with custom ones
3555        result = _apply_base64_alphabet_replacements(result, alphabet)
3556
3557        # Handle max_line_length by inserting newlines every N characters
3558        line_length = (
3559            t.cast(int, max_line_length.to_py())
3560            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3561            else 0
3562        )
3563        if line_length > 0:
3564            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3565            result = exp.Trim(
3566                this=exp.RegexpReplace(
3567                    this=result,
3568                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3569                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3570                ),
3571                expression=newline,
3572                position="TRAILING",
3573            )
3574
3575        return self.sql(result)
3576
3577    def replace_sql(self, expression: exp.Replace) -> str:
3578        result_sql = self.func(
3579            "REPLACE",
3580            _cast_to_varchar(expression.this),
3581            _cast_to_varchar(expression.expression),
3582            _cast_to_varchar(expression.args.get("replacement")),
3583        )
3584        return _gen_with_cast_to_blob(self, expression, result_sql)
3585
3586    def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
3587        _prepare_binary_bitwise_args(expression)
3588        result_sql = self.binary(expression, op)
3589        return _gen_with_cast_to_blob(self, expression, result_sql)
3590
3591    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3592        _prepare_binary_bitwise_args(expression)
3593        result_sql = self.func("XOR", expression.this, expression.expression)
3594        return _gen_with_cast_to_blob(self, expression, result_sql)
3595
3596    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3597        this = expression.this
3598        key = expression.args.get("key")
3599        key_sql = key.name if isinstance(key, exp.Expr) else ""
3600        value_sql = self.sql(expression, "value")
3601
3602        kv_sql = f"{key_sql} := {value_sql}"
3603
3604        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3605        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3606        if isinstance(this, exp.Struct) and not this.expressions:
3607            return self.func("STRUCT_PACK", kv_sql)
3608
3609        return self.func("STRUCT_INSERT", this, kv_sql)
3610
3611    def mapcat_sql(self, expression: exp.MapCat) -> str:
3612        result = exp.replace_placeholders(
3613            self.MAPCAT_TEMPLATE.copy(),
3614            map1=expression.this,
3615            map2=expression.expression,
3616        )
3617        return self.sql(result)
3618
3619    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3620        return self.func(
3621            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3622        )
3623
3624    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3625        map_arg = expression.this
3626        keys_to_delete = expression.expressions
3627
3628        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3629
3630        lambda_expr = exp.Lambda(
3631            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3632            expressions=[exp.to_identifier("x")],
3633        )
3634        result = exp.func(
3635            "MAP_FROM_ENTRIES",
3636            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3637        )
3638        return self.sql(result)
3639
3640    def mappick_sql(self, expression: exp.MapPick) -> str:
3641        map_arg = expression.this
3642        keys_to_pick = expression.expressions
3643
3644        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3645
3646        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3647            lambda_expr = exp.Lambda(
3648                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3649                expressions=[exp.to_identifier("x")],
3650            )
3651        else:
3652            lambda_expr = exp.Lambda(
3653                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3654                expressions=[exp.to_identifier("x")],
3655            )
3656
3657        result = exp.func(
3658            "MAP_FROM_ENTRIES",
3659            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3660        )
3661        return self.sql(result)
3662
3663    def mapsize_sql(self, expression: exp.MapSize) -> str:
3664        return self.func("CARDINALITY", expression.this)
3665
3666    @unsupported_args("update_flag")
3667    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3668        map_arg = expression.this
3669        key = expression.args.get("key")
3670        value = expression.args.get("value")
3671
3672        map_type = map_arg.type
3673
3674        if value is not None:
3675            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3676                # Extract the value type from MAP(key_type, value_type)
3677                value_type = map_type.expressions[1]
3678                # Cast value to match the map's value type to avoid type conflicts
3679                value = exp.cast(value, value_type)
3680            # else: polymorphic MAP case - no type parameters available, use value as-is
3681
3682        # Create a single-entry map for the new key-value pair
3683        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3684        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3685
3686        # Use MAP_CONCAT to merge the original map with the new entry
3687        # This automatically handles both insert and update cases
3688        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3689
3690        return self.sql(result)
3691
3692    def startswith_sql(self, expression: exp.StartsWith) -> str:
3693        return self.func(
3694            "STARTS_WITH",
3695            _cast_to_varchar(expression.this),
3696            _cast_to_varchar(expression.expression),
3697        )
3698
3699    def space_sql(self, expression: exp.Space) -> str:
3700        # DuckDB's REPEAT requires BIGINT for the count parameter
3701        return self.sql(
3702            exp.Repeat(
3703                this=exp.Literal.string(" "),
3704                times=exp.cast(expression.this, exp.DType.BIGINT),
3705            )
3706        )
3707
3708    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3709        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3710        if isinstance(expression.this, exp.Generator):
3711            # Preserve alias, joins, and other table-level args
3712            table = exp.Table(
3713                this=expression.this,
3714                alias=expression.args.get("alias"),
3715                joins=expression.args.get("joins"),
3716            )
3717            return self.sql(table)
3718
3719        return super().tablefromrows_sql(expression)
3720
3721    def unnest_sql(self, expression: exp.Unnest) -> str:
3722        explode_array = expression.args.get("explode_array")
3723        if explode_array:
3724            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3725            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3726            expression.expressions.append(
3727                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3728            )
3729
3730            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3731            alias = expression.args.get("alias")
3732            if isinstance(alias, exp.TableAlias):
3733                expression.set("alias", None)
3734                if alias.columns:
3735                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3736
3737            unnest_sql = super().unnest_sql(expression)
3738            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3739            return self.sql(select)
3740
3741        return super().unnest_sql(expression)
3742
3743    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3744        this = expression.this
3745
3746        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3747            # DuckDB should render IGNORE NULLS only for the general-purpose
3748            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3749            return super().ignorenulls_sql(expression)
3750
3751        if isinstance(this, exp.First):
3752            this = exp.AnyValue(this=this.this)
3753
3754        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3755            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3756
3757        return self.sql(this)
3758
3759    def split_sql(self, expression: exp.Split) -> str:
3760        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3761
3762        case_expr = exp.case().else_(base_func)
3763        needs_case = False
3764
3765        if expression.args.get("null_returns_null"):
3766            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3767            needs_case = True
3768
3769        if expression.args.get("empty_delimiter_returns_whole"):
3770            # When delimiter is empty string, return input string as single array element
3771            array_with_input = exp.array(expression.this)
3772            case_expr = case_expr.when(
3773                expression.expression.eq(exp.Literal.string("")), array_with_input
3774            )
3775            needs_case = True
3776
3777        return self.sql(case_expr if needs_case else base_func)
3778
3779    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3780        string_arg = expression.this
3781        delimiter_arg = expression.args.get("delimiter")
3782        part_index_arg = expression.args.get("part_index")
3783
3784        if delimiter_arg and part_index_arg:
3785            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3786            if expression.args.get("part_index_zero_as_one"):
3787                # Convert 0 to 1 for compatibility
3788
3789                part_index_arg = exp.Paren(
3790                    this=exp.case()
3791                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3792                    .else_(part_index_arg)
3793                )
3794
3795            # Use Anonymous to avoid recursion
3796            base_func_expr: exp.Expr = exp.Anonymous(
3797                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3798            )
3799            needs_case_transform = False
3800            case_expr = exp.case().else_(base_func_expr)
3801
3802            if expression.args.get("empty_delimiter_returns_whole"):
3803                # When delimiter is empty string:
3804                # - Return whole string if part_index is 1 or -1
3805                # - Return empty string otherwise
3806                empty_case = exp.Paren(
3807                    this=exp.case()
3808                    .when(
3809                        exp.or_(
3810                            part_index_arg.eq(exp.Literal.number("1")),
3811                            part_index_arg.eq(exp.Literal.number("-1")),
3812                        ),
3813                        string_arg,
3814                    )
3815                    .else_(exp.Literal.string(""))
3816                )
3817
3818                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3819                needs_case_transform = True
3820
3821            """
3822            Output looks something like this:
3823
3824            CASE
3825            WHEN delimiter is '' THEN
3826                (
3827                    CASE
3828                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3829                    ELSE '' END
3830                )
3831            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3832            END
3833
3834            """
3835            return self.sql(case_expr if needs_case_transform else base_func_expr)
3836
3837        return self.function_fallback_sql(expression)
3838
3839    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3840        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3841            # DuckDB should render RESPECT NULLS only for the general-purpose
3842            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3843            return super().respectnulls_sql(expression)
3844
3845        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3846        return self.sql(expression, "this")
3847
3848    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3849        null = expression.args.get("null")
3850
3851        if expression.args.get("null_is_empty"):
3852            x = exp.to_identifier("x")
3853            list_transform = exp.Transform(
3854                this=expression.this.copy(),
3855                expression=exp.Lambda(
3856                    this=exp.Coalesce(
3857                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3858                    ),
3859                    expressions=[x],
3860                ),
3861            )
3862            array_to_string = exp.ArrayToString(
3863                this=list_transform, expression=expression.expression
3864            )
3865            if expression.args.get("null_delim_is_null"):
3866                return self.sql(
3867                    exp.case()
3868                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3869                    .else_(array_to_string)
3870                )
3871            return self.sql(array_to_string)
3872
3873        if null:
3874            x = exp.to_identifier("x")
3875            return self.sql(
3876                exp.ArrayToString(
3877                    this=exp.Transform(
3878                        this=expression.this,
3879                        expression=exp.Lambda(
3880                            this=exp.Coalesce(this=x, expressions=[null]),
3881                            expressions=[x],
3882                        ),
3883                    ),
3884                    expression=expression.expression,
3885                )
3886            )
3887
3888        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
3889
3890    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3891        # DuckDB-specific: handle binary types using DPipe (||) operator
3892        separator = seq_get(expression.expressions, 0)
3893        args = expression.expressions[1:]
3894
3895        if any(_is_binary(arg) for arg in [separator, *args]):
3896            result = args[0]
3897            for arg in args[1:]:
3898                result = exp.DPipe(
3899                    this=exp.DPipe(this=result, expression=separator), expression=arg
3900                )
3901            return self.sql(result)
3902
3903        return super().concatws_sql(expression)
3904
3905    def _regexp_extract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str:
3906        this = expression.this
3907        group = expression.args.get("group")
3908        params = expression.args.get("parameters")
3909        position = expression.args.get("position")
3910        occurrence = expression.args.get("occurrence")
3911        null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
3912
3913        # Handle Snowflake's 'e' flag: it enables capture group extraction
3914        # In DuckDB, this is controlled by the group parameter directly
3915        if params and params.is_string and "e" in params.name:
3916            params = exp.Literal.string(params.name.replace("e", ""))
3917
3918        validated_flags = self._validate_regexp_flags(params, supported_flags="cims")
3919
3920        # Strip default group when no following params (DuckDB default is same as group=0)
3921        if (
3922            not validated_flags
3923            and group
3924            and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
3925        ):
3926            group = None
3927
3928        flags_expr = exp.Literal.string(validated_flags) if validated_flags else None
3929
3930        # use substring to handle position argument
3931        if position and (not position.is_int or position.to_py() > 1):
3932            this = exp.Substring(this=this, start=position)
3933
3934            if null_if_pos_overflow:
3935                this = exp.Nullif(this=this, expression=exp.Literal.string(""))
3936
3937        is_extract_all = isinstance(expression, exp.RegexpExtractAll)
3938        non_single_occurrence = occurrence and (not occurrence.is_int or occurrence.to_py() > 1)
3939
3940        if is_extract_all or non_single_occurrence:
3941            name = "REGEXP_EXTRACT_ALL"
3942        else:
3943            name = "REGEXP_EXTRACT"
3944
3945        result: exp.Expr = exp.Anonymous(
3946            this=name, expressions=[this, expression.expression, group, flags_expr]
3947        )
3948
3949        # Array slicing for REGEXP_EXTRACT_ALL with occurrence
3950        if is_extract_all and non_single_occurrence:
3951            result = exp.Bracket(this=result, expressions=[exp.Slice(this=occurrence)])
3952        # ARRAY_EXTRACT for REGEXP_EXTRACT with occurrence > 1
3953        elif non_single_occurrence:
3954            result = exp.Anonymous(this="ARRAY_EXTRACT", expressions=[result, occurrence])
3955
3956        return self.sql(result)
3957
3958    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
3959        return self._regexp_extract_sql(expression)
3960
3961    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
3962        return self._regexp_extract_sql(expression)
3963
3964    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
3965        this = expression.this
3966        pattern = expression.expression
3967        position = expression.args.get("position")
3968        orig_occ = expression.args.get("occurrence")
3969        occurrence = orig_occ or exp.Literal.number(1)
3970        option = expression.args.get("option")
3971        parameters = expression.args.get("parameters")
3972
3973        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3974        if validated_flags:
3975            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3976
3977        # Handle starting position offset
3978        pos_offset: exp.Expr = exp.Literal.number(0)
3979        if position and (not position.is_int or position.to_py() > 1):
3980            this = exp.Substring(this=this, start=position)
3981            pos_offset = position - exp.Literal.number(1)
3982
3983        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
3984        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
3985            lst = exp.Bracket(
3986                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
3987                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
3988                offset=1,
3989            )
3990            transform = exp.Anonymous(
3991                this="LIST_TRANSFORM",
3992                expressions=[
3993                    lst,
3994                    exp.Lambda(
3995                        this=exp.Length(this=exp.to_identifier("x")),
3996                        expressions=[exp.to_identifier("x")],
3997                    ),
3998                ],
3999            )
4000            return exp.Coalesce(
4001                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4002                expressions=[exp.Literal.number(0)],
4003            )
4004
4005        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4006        base_pos: exp.Expr = (
4007            exp.Literal.number(1)
4008            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4009            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4010            + pos_offset
4011        )
4012
4013        # option=1: add match length for end position
4014        if option and option.is_int and option.to_py() == 1:
4015            match_at_occ = exp.Bracket(
4016                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4017                expressions=[occurrence],
4018                offset=1,
4019            )
4020            base_pos = base_pos + exp.Coalesce(
4021                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4022            )
4023
4024        # NULL checks for all provided arguments
4025        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4026        null_args = [
4027            expression.this,
4028            expression.expression,
4029            position,
4030            orig_occ,
4031            option,
4032            parameters,
4033        ]
4034        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4035
4036        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4037
4038        return self.sql(
4039            exp.case()
4040            .when(exp.or_(*null_checks), exp.Null())
4041            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4042            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4043            .else_(base_pos)
4044        )
4045
4046    @unsupported_args("culture")
4047    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4048        fmt = expression.args.get("format")
4049        if fmt and fmt.is_int:
4050            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4051
4052        self.unsupported("Only integer formats are supported by NumberToStr")
4053        return self.function_fallback_sql(expression)
4054
4055    def autoincrementcolumnconstraint_sql(self, _) -> str:
4056        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4057        return ""
4058
4059    def aliases_sql(self, expression: exp.Aliases) -> str:
4060        this = expression.this
4061        if isinstance(this, exp.Posexplode):
4062            return self.posexplode_sql(this)
4063
4064        return super().aliases_sql(expression)
4065
4066    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4067        this = expression.this
4068        parent = expression.parent
4069
4070        # The default Spark aliases are "pos" and "col", unless specified otherwise
4071        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4072
4073        if isinstance(parent, exp.Aliases):
4074            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4075            pos, col = parent.expressions
4076        elif isinstance(parent, exp.Table):
4077            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4078            alias = parent.args.get("alias")
4079            if alias:
4080                pos, col = alias.columns or [pos, col]
4081                alias.pop()
4082
4083        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4084        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4085        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4086        gen_subscripts = self.sql(
4087            exp.Alias(
4088                this=exp.Anonymous(
4089                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4090                )
4091                - exp.Literal.number(1),
4092                alias=pos,
4093            )
4094        )
4095
4096        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4097
4098        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4099            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4100            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4101
4102        return posexplode_sql
4103
4104    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4105        """
4106        Handles three key issues:
4107        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4108        2. End-of-month preservation: If input is last day of month, result is last day of result month
4109        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4110        """
4111        from sqlglot.optimizer.annotate_types import annotate_types
4112
4113        this = expression.this
4114        if not this.type:
4115            this = annotate_types(this, dialect=self.dialect)
4116
4117        if this.is_type(*exp.DataType.TEXT_TYPES):
4118            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4119
4120        # Detect float/decimal months to apply rounding (Snowflake behavior)
4121        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4122        months_expr = expression.expression
4123        if not months_expr.type:
4124            months_expr = annotate_types(months_expr, dialect=self.dialect)
4125
4126        # Build interval or to_months expression based on type
4127        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4128        interval_or_to_months = (
4129            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4130            if months_expr.is_type(
4131                exp.DType.FLOAT,
4132                exp.DType.DOUBLE,
4133                exp.DType.DECIMAL,
4134            )
4135            # Integer case: standard INTERVAL N MONTH syntax
4136            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4137        )
4138
4139        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4140
4141        # Apply end-of-month preservation if Snowflake flag is set
4142        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4143        preserve_eom = expression.args.get("preserve_end_of_month")
4144        result_expr = (
4145            exp.case()
4146            .when(
4147                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4148                exp.func("LAST_DAY", date_add_expr),
4149            )
4150            .else_(date_add_expr)
4151            if preserve_eom
4152            else date_add_expr
4153        )
4154
4155        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4156        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4157        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4158        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4159        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4160            return self.sql(exp.Cast(this=result_expr, to=this.type))
4161        return self.sql(result_expr)
4162
4163    def format_sql(self, expression: exp.Format) -> str:
4164        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4165            return self.func("FORMAT", "'{}'", expression.expressions[0])
4166
4167        return self.function_fallback_sql(expression)
4168
4169    def hexstring_sql(
4170        self, expression: exp.HexString, binary_function_repr: str | None = None
4171    ) -> str:
4172        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4173        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
4174
4175    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4176        unit = expression.args.get("unit")
4177        date = expression.this
4178
4179        week_start = _week_unit_to_dow(unit)
4180        unit = unit_to_str(expression)
4181
4182        if week_start:
4183            result = self.sql(
4184                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4185            )
4186        else:
4187            result = self.func("DATE_TRUNC", unit, date)
4188
4189        if (
4190            expression.args.get("input_type_preserved")
4191            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4192            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4193        ):
4194            return self.sql(exp.Cast(this=result, to=date.type))
4195
4196        return result
4197
4198    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4199        unit = unit_to_str(expression)
4200        zone = expression.args.get("zone")
4201        timestamp = expression.this
4202        date_unit = is_date_unit(unit)
4203
4204        if date_unit and zone:
4205            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4206            # Double AT TIME ZONE needed for BigQuery compatibility:
4207            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4208            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4209            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4210            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4211            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4212
4213        result = self.func("DATE_TRUNC", unit, timestamp)
4214        if expression.args.get("input_type_preserved"):
4215            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4216                dummy_date = exp.Cast(
4217                    this=exp.Literal.string("1970-01-01"),
4218                    to=exp.DataType(this=exp.DType.DATE),
4219                )
4220                date_time = exp.Add(this=dummy_date, expression=timestamp)
4221                result = self.func("DATE_TRUNC", unit, date_time)
4222                return self.sql(exp.Cast(this=result, to=timestamp.type))
4223
4224            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4225                date_unit and timestamp.is_type(exp.DType.DATE)
4226            ):
4227                return self.sql(exp.Cast(this=result, to=timestamp.type))
4228
4229        return result
4230
4231    def trim_sql(self, expression: exp.Trim) -> str:
4232        expression.this.replace(_cast_to_varchar(expression.this))
4233        if expression.expression:
4234            expression.expression.replace(_cast_to_varchar(expression.expression))
4235
4236        result_sql = super().trim_sql(expression)
4237        return _gen_with_cast_to_blob(self, expression, result_sql)
4238
4239    def round_sql(self, expression: exp.Round) -> str:
4240        this = expression.this
4241        decimals = expression.args.get("decimals")
4242        truncate = expression.args.get("truncate")
4243
4244        # DuckDB requires the scale (decimals) argument to be an INT
4245        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4246        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4247            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4248                decimals = exp.cast(decimals, exp.DType.INT)
4249
4250        func = "ROUND"
4251        if truncate:
4252            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4253            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4254                func = "ROUND_EVEN"
4255                truncate = None
4256            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4257            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4258                truncate = None
4259
4260        return self.func(func, this, decimals, truncate)
4261
4262    def strtok_sql(self, expression: exp.Strtok) -> str:
4263        string_arg = expression.this
4264        delimiter_arg = expression.args.get("delimiter")
4265        part_index_arg = expression.args.get("part_index")
4266
4267        if delimiter_arg and part_index_arg:
4268            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4269            escaped_delimiter = exp.Anonymous(
4270                this="REGEXP_REPLACE",
4271                expressions=[
4272                    delimiter_arg,
4273                    exp.Literal.string(
4274                        r"([\[\]^.\-*+?(){}|$\\])"
4275                    ),  # Escape problematic regex chars
4276                    exp.Literal.string(
4277                        r"\\\1"
4278                    ),  # Replace with escaped version using $1 backreference
4279                    exp.Literal.string("g"),  # Global flag
4280                ],
4281            )
4282            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4283            regex_pattern = (
4284                exp.case()
4285                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4286                .else_(
4287                    exp.func(
4288                        "CONCAT",
4289                        exp.Literal.string("["),
4290                        escaped_delimiter,
4291                        exp.Literal.string("]"),
4292                    )
4293                )
4294            )
4295
4296            # STRTOK skips empty strings, so we need to filter them out
4297            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4298            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4299            x = exp.to_identifier("x")
4300            is_empty = x.eq(exp.Literal.string(""))
4301            filtered_array = exp.func(
4302                "LIST_FILTER",
4303                split_array,
4304                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4305            )
4306            base_func = exp.Bracket(
4307                this=filtered_array,
4308                expressions=[part_index_arg],
4309                offset=1,
4310            )
4311
4312            # Use template with the built regex pattern
4313            result = exp.replace_placeholders(
4314                self.STRTOK_TEMPLATE.copy(),
4315                string=string_arg,
4316                delimiter=delimiter_arg,
4317                part_index=part_index_arg,
4318                base_func=base_func,
4319            )
4320
4321            return self.sql(result)
4322
4323        return self.function_fallback_sql(expression)
4324
4325    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4326        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4327
4328        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4329        if expression.is_type(*exp.DataType.REAL_TYPES):
4330            result = f"CAST({result} AS DOUBLE)"
4331
4332        return result
4333
4334    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4335        """
4336        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4337        dividing the input distribution into n equal-sized buckets.
4338
4339        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4340        does not document the specific algorithm used so results may differ. DuckDB does not
4341        support RESPECT NULLS.
4342        """
4343        this = expression.this
4344        if isinstance(this, exp.Distinct):
4345            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4346            if len(this.expressions) < 2:
4347                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4348                return self.function_fallback_sql(expression)
4349            num_quantiles_expr = this.expressions[1].pop()
4350        else:
4351            num_quantiles_expr = expression.expression
4352
4353        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4354            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4355            return self.function_fallback_sql(expression)
4356
4357        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4358        if num_quantiles <= 0:
4359            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4360            return self.function_fallback_sql(expression)
4361
4362        quantiles = [
4363            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4364            for i in range(num_quantiles + 1)
4365        ]
4366
4367        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))
4368
4369    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4370        if expression.args.get("scalar_only"):
4371            expression = exp.JSONExtractScalar(
4372                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4373            )
4374        return _arrow_json_extract_sql(self, expression)
4375
4376    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4377        this = expression.this
4378
4379        if _is_binary(this):
4380            expression.type = exp.DType.BINARY.into_expr()
4381
4382        arg = _cast_to_bit(this)
4383
4384        if isinstance(this, exp.Neg):
4385            arg = exp.Paren(this=arg)
4386
4387        expression.set("this", arg)
4388
4389        result_sql = f"~{self.sql(expression, 'this')}"
4390
4391        return _gen_with_cast_to_blob(self, expression, result_sql)
4392
4393    def window_sql(self, expression: exp.Window) -> str:
4394        this = expression.this
4395        if isinstance(this, exp.Corr) or (
4396            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4397        ):
4398            return self._corr_sql(expression)
4399
4400        return super().window_sql(expression)
4401
4402    def filter_sql(self, expression: exp.Filter) -> str:
4403        if isinstance(expression.this, exp.Corr):
4404            return self._corr_sql(expression)
4405
4406        return super().filter_sql(expression)
4407
4408    def _corr_sql(
4409        self,
4410        expression: exp.Filter | exp.Window | exp.Corr,
4411    ) -> str:
4412        if isinstance(expression, exp.Corr) and not expression.args.get("null_on_zero_variance"):
4413            return self.func("CORR", expression.this, expression.expression)
4414
4415        corr_expr = _maybe_corr_null_to_false(expression)
4416        if corr_expr is None:
4417            if isinstance(expression, exp.Window):
4418                return super().window_sql(expression)
4419            if isinstance(expression, exp.Filter):
4420                return super().filter_sql(expression)
4421            corr_expr = expression  # make mypy happy
4422
4423        return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether to normalize identifiers to lowercase. Default: False.
  • pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
  • indent: The indentation size in a formatted string. For example, this affects the indentation of subqueries and filters under a WHERE clause. Default: 2.
  • normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether to preserve comments in the output SQL code. Default: True
PARAMETER_TOKEN = '$'
NAMED_PLACEHOLDER_TOKEN = '$'
JOIN_HINTS = False
TABLE_HINTS = False
QUERY_HINTS = False
LIMIT_FETCH = 'LIMIT'
STRUCT_DELIMITER = ('(', ')')
RENAME_TABLE_WITH_DB = False
NVL2_SUPPORTED = False
SEMI_ANTI_JOIN_WITH_SIDE = False
TABLESAMPLE_KEYWORDS = 'USING SAMPLE'
TABLESAMPLE_SEED_KEYWORD = 'REPEATABLE'
LAST_DAY_SUPPORTS_DATE_PART = False
JSON_KEY_VALUE_PAIR_SEP = ','
IGNORE_NULLS_IN_FUNC = True
IGNORE_NULLS_BEFORE_ORDER = False
JSON_PATH_BRACKETED_KEY_SUPPORTED = False
SUPPORTS_CREATE_TABLE_LIKE = False
MULTI_ARG_DISTINCT = False
CAN_IMPLEMENT_ARRAY_ANY = True
SUPPORTS_TO_NUMBER = False
SELECT_KINDS: tuple[str, ...] = ()
SUPPORTS_DECODE_CASE = False
SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
AFTER_HAVING_MODIFIER_TRANSFORMS = {'windows': <function <lambda>>, 'qualify': <function <lambda>>}
SUPPORTS_WINDOW_EXCLUDE = True
COPY_HAS_INTO_KEYWORD = False
STAR_EXCEPT = 'EXCLUDE'
PAD_FILL_PATTERN_IS_REQUIRED = True
ARRAY_SIZE_DIM_REQUIRED: bool | None = False
NORMALIZE_EXTRACT_DATE_PARTS = True
SUPPORTS_LIKE_QUANTIFIERS = False
SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
TRANSFORMS = {<class 'sqlglot.expressions.query.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.query.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.query.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.query.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.core.Adjacent'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.AllowedValuesProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.AnalyzeColumns'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.AnalyzeWith'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.ArrayContainsAll'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.ArrayOverlaps'>: <function _array_overlaps_sql>, <class 'sqlglot.expressions.constraints.AssumeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.BackupProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.math.Ceil'>: <function _ceil_floor>, <class 'sqlglot.expressions.constraints.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.CommentColumnConstraint'>: <function no_comment_column_constraint_sql>, <class 'sqlglot.expressions.functions.ConnectByRoot'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.string.ConvertToCharset'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CredentialsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.CurrentCatalog'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.SessionUser'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ApiProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ApplicationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CatalogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ComputeProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.DatabaseProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.DynamicProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.EmptyProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.EndStatement'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.EnviromentProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.HandlerProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ParameterStyleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.EphemeralColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ExcludeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Except'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.math.Floor'>: <function _ceil_floor>, <class 'sqlglot.expressions.query.Get'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.GlobalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.HybridProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.IcebergProperty'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.properties.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Intersect'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.datatypes.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.Int64'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONBContainsAnyTopKeys'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONBContainsAllTopKeys'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONBDeleteAtPath'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONObject'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONObjectAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.properties.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.MaskingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.NetFunc'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.NetworkProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.NonClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.NotForReplicationColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.OnProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.Operator'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.ExtendsLeft'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.ExtendsRight'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.PartitionedByBucket'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.PartitionByTruncate'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.PivotAny'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.PositionalColumn'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ProjectionPolicyColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.InvisibleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ZeroFillColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Put'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ReturnsProperty'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.properties.RowAccessProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.SafeFunc'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SecureProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SecurityIntegrationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SharingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Stream'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.StreamingTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.StrictProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ddl.SwapTable'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.TableColumn'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.Tags'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.ToMap'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.VirtualProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ddl.TriggerExecute'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Union'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.UnloggedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.UsingTemplateProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.UsingData'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.temporal.UtcDate'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.temporal.UtcTime'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.temporal.UtcTimestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Variadic'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.VarMap'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ViewAttributeProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.WithProcedureOptions'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.WithSchemaBindingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.WithOperator'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ForceProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.aggregate.AnyValue'>: <function _anyvalue_sql>, <class 'sqlglot.expressions.core.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.math.Boolnot'>: <function _boolnot_sql>, <class 'sqlglot.expressions.math.Booland'>: <function _booland_sql>, <class 'sqlglot.expressions.math.Boolor'>: <function _boolor_sql>, <class 'sqlglot.expressions.array.Array'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.array.ArrayAppend'>: <function array_append_sql.<locals>._array_append_sql>, <class 'sqlglot.expressions.array.ArrayCompact'>: <function array_compact_sql>, <class 'sqlglot.expressions.array.ArrayConstructCompact'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.ArrayConcat'>: <function array_concat_sql.<locals>._array_concat_sql>, <class 'sqlglot.expressions.array.ArrayContains'>: <function _array_contains_sql>, <class 'sqlglot.expressions.array.ArrayFilter'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.array.ArrayInsert'>: <function _array_insert_sql>, <class 'sqlglot.expressions.array.ArrayPosition'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.ArrayRemoveAt'>: <function _array_remove_at_sql>, <class 'sqlglot.expressions.array.ArrayRemove'>: <function remove_from_array_using_filter>, <class 'sqlglot.expressions.array.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.array.ArrayPrepend'>: <function array_append_sql.<locals>._array_append_sql>, <class 'sqlglot.expressions.array.ArraySum'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.array.ArrayMax'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.array.ArrayMin'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.string.Base64DecodeBinary'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Base64DecodeString'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.core.BitwiseAnd'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.BitwiseAndAgg'>: <function _bitwise_agg_sql>, <class 'sqlglot.expressions.math.BitwiseCount'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.core.BitwiseLeftShift'>: <function _bitshift_sql>, <class 'sqlglot.expressions.core.BitwiseOr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.BitwiseOrAgg'>: <function _bitwise_agg_sql>, <class 'sqlglot.expressions.core.BitwiseRightShift'>: <function _bitshift_sql>, <class 'sqlglot.expressions.math.BitwiseXorAgg'>: <function _bitwise_agg_sql>, <class 'sqlglot.expressions.aggregate.Corr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.CosineDistance'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.CurrentTime'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.CurrentSchemas'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.CurrentTimestamp'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.CurrentVersion'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.Localtime'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfWeek'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfWeekIso'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfYear'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.Dayname'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.Monthname'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.datatypes.DataType'>: <function _datatype_sql>, <class 'sqlglot.expressions.temporal.Date'>: <function _date_sql>, <class 'sqlglot.expressions.temporal.DateAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DateFromParts'>: <function _date_from_parts_sql>, <class 'sqlglot.expressions.temporal.DateSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.temporal.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.temporal.Datetime'>: <function no_datetime_sql>, <class 'sqlglot.expressions.temporal.DatetimeDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.temporal.DatetimeSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DatetimeAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DateToDi'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Decode'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.DiToDate'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Encode'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.EqualNull'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.EuclideanDistance'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.GenerateDateArray'>: <function _generate_datetime_array_sql>, <class 'sqlglot.expressions.array.GenerateSeries'>: <function generate_series_sql.<locals>._generate_series_sql>, <class 'sqlglot.expressions.temporal.GenerateTimestampArray'>: <function _generate_datetime_array_sql>, <class 'sqlglot.expressions.math.Getbit'>: <function getbit_sql>, <class 'sqlglot.expressions.aggregate.GroupConcat'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.Explode'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.core.IntDiv'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.IsInf'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.math.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.functions.IsNullValue'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.IsArray'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.json.JSONBExists'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.json.JSONExtract'>: <function _arrow_json_extract_sql>, <class 'sqlglot.expressions.json.JSONExtractArray'>: <function _json_extract_value_array_sql>, <class 'sqlglot.expressions.json.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.query.JSONValueArray'>: <function _json_extract_value_array_sql>, <class 'sqlglot.expressions.query.Lateral'>: <function _explode_to_unnest_sql>, <class 'sqlglot.expressions.aggregate.LogicalOr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.aggregate.LogicalAnd'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.query.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.functions.Seq1'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.Seq2'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.Seq4'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.Seq8'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.BoolxorAgg'>: <function _boolxor_agg_sql>, <class 'sqlglot.expressions.temporal.MakeInterval'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Initcap'>: <function _initcap_sql>, <class 'sqlglot.expressions.string.MD5Digest'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA1Digest'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA2'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA2Digest'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.MonthsBetween'>: <function months_between_sql>, <class 'sqlglot.expressions.temporal.NextDay'>: <function _day_navigation_sql>, <class 'sqlglot.expressions.aggregate.PercentileCont'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.aggregate.PercentileDisc'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.query.Pivot'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.temporal.PreviousDay'>: <function _day_navigation_sql>, <class 'sqlglot.expressions.string.RegexpILike'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.aggregate.RegrValx'>: <function _regr_val_sql>, <class 'sqlglot.expressions.aggregate.RegrValy'>: <function _regr_val_sql>, <class 'sqlglot.expressions.query.Return'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.StrToUnix'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.Struct'>: <function _struct_sql>, <class 'sqlglot.expressions.array.Transform'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.TimeAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TimeSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.Time'>: <function no_time_sql>, <class 'sqlglot.expressions.temporal.TimeDiff'>: <function _timediff_sql>, <class 'sqlglot.expressions.temporal.Timestamp'>: <function no_timestamp_sql>, <class 'sqlglot.expressions.temporal.TimestampAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TimestampDiff'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimestampSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TimeStrToDate'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.temporal.TimeStrToUnix'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimeToStr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.ToBoolean'>: <function _to_boolean_sql>, <class 'sqlglot.expressions.functions.ToVariant'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.TsOrDiToDi'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TsOrDsAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TsOrDsDiff'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixMicros'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixMillis'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixSeconds'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixToStr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.DatetimeTrunc'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixToTime'>: <function _unix_to_time_sql>, <class 'sqlglot.expressions.temporal.UnixToTimeStr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.aggregate.VariancePop'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.WeekOfYear'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.YearOfWeek'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.YearOfWeekIso'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.core.Xor'>: <function _xor_sql>, <class 'sqlglot.expressions.json.JSONBObjectAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DateBin'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.LastDay'>: <function _last_day_sql>}
TYPE_MAPPING = {<DType.DATETIME2: 'DATETIME2'>: 'TIMESTAMP', <DType.NCHAR: 'NCHAR'>: 'TEXT', <DType.NVARCHAR: 'NVARCHAR'>: 'TEXT', <DType.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <DType.LONGTEXT: 'LONGTEXT'>: 'TEXT', <DType.TINYTEXT: 'TINYTEXT'>: 'TEXT', <DType.BLOB: 'BLOB'>: 'VARBINARY', <DType.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <DType.LONGBLOB: 'LONGBLOB'>: 'BLOB', <DType.TINYBLOB: 'TINYBLOB'>: 'BLOB', <DType.INET: 'INET'>: 'INET', <DType.ROWVERSION: 'ROWVERSION'>: 'BLOB', <DType.SMALLDATETIME: 'SMALLDATETIME'>: 'TIMESTAMP', <DType.BINARY: 'BINARY'>: 'BLOB', <DType.BPCHAR: 'BPCHAR'>: 'TEXT', <DType.CHAR: 'CHAR'>: 'TEXT', <DType.DATETIME: 'DATETIME'>: 'TIMESTAMP', <DType.DECFLOAT: 'DECFLOAT'>: 'DECIMAL(38, 5)', <DType.FLOAT: 'FLOAT'>: 'REAL', <DType.JSONB: 'JSONB'>: 'JSON', <DType.UINT: 'UINT'>: 'UINTEGER', <DType.VARBINARY: 'VARBINARY'>: 'BLOB', <DType.VARCHAR: 'VARCHAR'>: 'TEXT', <DType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>: 'TIMESTAMPTZ', <DType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>: 'TIMESTAMP', <DType.TIMESTAMP_S: 'TIMESTAMP_S'>: 'TIMESTAMP_S', <DType.TIMESTAMP_MS: 'TIMESTAMP_MS'>: 'TIMESTAMP_MS', <DType.TIMESTAMP_NS: 'TIMESTAMP_NS'>: 'TIMESTAMP_NS', <DType.BIGDECIMAL: 'BIGDECIMAL'>: 'DECIMAL(38, 5)'}
RESERVED_KEYWORDS = {'lateral_p', 'deferrable', 'current_date', 'collate', 'select', 'asc_p', 'else', 'as', 'not', 'asymmetric', 'cast', 'table', 'or', 'current_time', 'where', 'primary', 'all', 'false_p', 'union', 'localtime', 'desc_p', 'variadic', 'when', 'analyze', 'foreign', 'localtimestamp', 'some', 'current_catalog', 'symmetric', 'from', 'analyse', 'fetch', 'both', 'on', 'to', 'true_p', 'session_user', 'check_p', 'group_p', 'only', 'window', 'user', 'in_p', 'offset', 'current_timestamp', 'initially', 'with', 'references', 'end_p', 'create_p', 'placing', 'default', 'constraint', 'into', 'array', 'order', 'for', 'leading', 'do', 'having', 'intersect', 'returning', 'current_user', 'any', 'current_role', 'and', 'case', 'null_p', 'using', 'column', 'limit', 'unique', 'trailing', 'grant', 'except', 'then', 'distinct'}
UNWRAPPED_INTERVAL_VALUES = (<class 'sqlglot.expressions.core.Literal'>, <class 'sqlglot.expressions.core.Paren'>)
PROPERTIES_LOCATION = {<class 'sqlglot.expressions.properties.AllowedValuesProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.AlgorithmProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ApiProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ApplicationProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.AutoIncrementProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.AutoRefreshProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.BackupProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.BlockCompressionProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CatalogProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CharacterSetProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ChecksumProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CollateProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ComputeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CopyGrantsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.query.Cluster'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ClusteredByProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DistributedByProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DuplicateKeyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DataBlocksizeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DatabaseProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DataDeletionProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DefinerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DictRange'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DictProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DynamicProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DistKeyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DistStyleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EmptyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EncodeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EngineProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EnviromentProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.HandlerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ParameterStyleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ExecuteAsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ExternalProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.FallbackProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.FileFormatProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.FreespaceProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.GlobalProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.HeapProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.HybridProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.InheritsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.IcebergProperty'>: <PropertiesLocation.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.properties.IncludeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.InputModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.IsolatedLoadingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.JournalProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LanguageProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LikeProperty'>: <PropertiesLocation.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.properties.LocationProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LockProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LockingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LogProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.MaskingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.MaterializedProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.MergeBlockRatioProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ModuleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.NetworkProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.NoPrimaryIndexProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.OnProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.OnCommitProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.query.Order'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.OutputModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.PartitionedByProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.PartitionedOfProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.constraints.PrimaryKey'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.Property'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RefreshTriggerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RemoteWithConnectionModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ReturnsProperty'>: <PropertiesLocation.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.properties.RollupProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowAccessProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowFormatProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowFormatDelimitedProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowFormatSerdeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SampleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SchemaCommentProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SecureProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SecurityIntegrationProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SerdeProperties'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.ddl.Set'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SettingsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SetProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SetConfigProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SharingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.ddl.SequenceProperties'>: <PropertiesLocation.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.ddl.TriggerProperties'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SortKeyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SqlReadWriteProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SqlSecurityProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StabilityProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StorageHandlerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StreamingTableProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StrictProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.Tags'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.TemporaryProperty'>: <PropertiesLocation.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.properties.ToTableProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.TransientProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.TransformModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.ddl.MergeTreeTTL'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.UnloggedProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.UsingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.UsingTemplateProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ViewAttributeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.VirtualProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.VolatileProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithDataProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithJournalTableProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithProcedureOptions'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithSchemaBindingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithSystemVersioningProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ForceProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>}
ZIPF_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Min( this=Column( this=Identifier(this=i, quoted=False)))], from_=From( this=Table( this=Identifier(this=cdf, quoted=False))), where=Where( this=GTE( this=Column( this=Identifier(this=p, quoted=False)), expression=Subquery( this=Select( expressions=[ Column( this=Identifier(this=r, quoted=False))], from_=From( this=Table( this=Identifier(this=rand, quoted=False))))))), with_=With( expressions=[ CTE( this=Select( expressions=[ Alias( this=Placeholder(this=random_expr), alias=Identifier(this=r, quoted=False))]), alias=TableAlias( this=Identifier(this=rand, quoted=False))), CTE( this=Select( expressions=[ Column( this=Identifier(this=i, quoted=False)), Alias( this=Div( this=Literal(this=1.0, is_string=False), expression=Pow( this=Column( this=Identifier(this=i, quoted=False)), expression=Placeholder(this=s)), typed=False, safe=False), alias=Identifier(this=w, quoted=False))], from_=From( this=Table( this=Anonymous( this=RANGE, expressions=[ Literal(this=1, is_string=False), Add( this=Placeholder(this=n), expression=Literal(this=1, is_string=False))]), alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=i, quoted=False)])))), alias=TableAlias( this=Identifier(this=weights, quoted=False))), CTE( this=Select( expressions=[ Column( this=Identifier(this=i, quoted=False)), Alias( this=Div( this=Window( this=Sum( this=Column( this=Identifier(this=w, quoted=False))), order=Order( expressions=[ Ordered( this=Column( this=Identifier(this=i, quoted=False)), nulls_first=True)]), over=OVER), expression=Window( this=Sum( this=Column( this=Identifier(this=w, quoted=False))), over=OVER), typed=False, safe=False), alias=Identifier(this=p, quoted=False))], from_=From( this=Table( this=Identifier(this=weights, quoted=False)))), alias=TableAlias( this=Identifier(this=cdf, quoted=False)))]))
NORMAL_TEMPLATE: sqlglot.expressions.core.Expr = Add( this=Placeholder(this=mean), expression=Paren( this=Mul( this=Mul( this=Placeholder(this=stddev), expression=Sqrt( this=Mul( this=Neg( this=Literal(this=2, is_string=False)), expression=Ln( this=Greatest( this=Placeholder(this=u1), expressions=[ Literal(this=1e-10, is_string=False)], ignore_nulls=True))))), expression=Cos( this=Mul( this=Mul( this=Literal(this=2, is_string=False), expression=Pi()), expression=Placeholder(this=u2))))))
SEEDED_RANDOM_TEMPLATE: sqlglot.expressions.core.Expr = Div( this=Paren( this=Mod( this=Abs( this=Anonymous( this=HASH, expressions=[ Placeholder(this=seed)])), expression=Literal(this=1000000, is_string=False))), expression=Literal(this=1000000.0, is_string=False), typed=False, safe=False)
SEQ_UNSIGNED: sqlglot.expressions.core.Expr = Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val))
SEQ_SIGNED: sqlglot.expressions.core.Expr = Paren( this=Case( ifs=[ If( this=GTE( this=Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val)), expression=Placeholder(this=half)), true=Sub( this=Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val)), expression=Placeholder(this=max_val)))], default=Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val))))
MAPCAT_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Or( this=Is( this=Placeholder(this=map1), expression=Null()), expression=Is( this=Placeholder(this=map2), expression=Null())), true=Null())], default=MapFromEntries( this=Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=LIST_TRANSFORM, expressions=[ Anonymous( this=LIST_DISTINCT, expressions=[ Anonymous( this=LIST_CONCAT, expressions=[ MapKeys( this=Placeholder(this=map1)), MapKeys( this=Placeholder(this=map2))])]), Lambda( this=Anonymous( this=STRUCT_PACK, expressions=[ PropertyEQ( this=Identifier(this=key, quoted=False), expression=Identifier(this=__k, quoted=False)), PropertyEQ( this=Identifier(this=value, quoted=False), expression=Coalesce( this=Bracket( this=Placeholder(this=map2), expressions=[ Identifier(this=__k, quoted=False)]), expressions=[ Bracket( this=Placeholder(this=map1), expressions=[ Identifier(this=__k, quoted=False)])]))]), expressions=[ Identifier(this=__k, quoted=False)])]), Lambda( this=Not( this=Is( this=Dot( this=Identifier(this=__x, quoted=False), expression=Identifier(this=value, quoted=False)), expression=Null())), expressions=[ Identifier(this=__x, quoted=False)])])))
EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {'WEEKISO': ('%V', 'INTEGER'), 'YEAROFWEEK': ('%G', 'INTEGER'), 'YEAROFWEEKISO': ('%G', 'INTEGER'), 'NANOSECOND': ('%n', 'BIGINT')}
EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {'EPOCH_SECOND': 'EPOCH', 'EPOCH_MILLISECOND': 'EPOCH_MS', 'EPOCH_MICROSECOND': 'EPOCH_US', 'EPOCH_NANOSECOND': 'EPOCH_NS'}
BITMAP_CONSTRUCT_AGG_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Case( ifs=[ If( this=Or( this=Is( this=Column( this=Identifier(this=l, quoted=False)), expression=Null()), expression=EQ( this=Length( this=Column( this=Identifier(this=l, quoted=False))), expression=Literal(this=0, is_string=False))), true=Null()), If( this=NEQ( this=Length( this=Column( this=Identifier(this=l, quoted=False))), expression=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Column( this=Identifier(this=l, quoted=False)), Lambda( this=Between( this=Identifier(this=__v, quoted=False), low=Literal(this=0, is_string=False), high=Literal(this=32767, is_string=False)), expressions=[ Identifier(this=__v, quoted=False)])]))), true=Null()), If( this=LT( this=Length( this=Column( this=Identifier(this=l, quoted=False))), expression=Literal(this=5, is_string=False)), true=Unhex( this=DPipe( this=DPipe( this=Anonymous( this=PRINTF, expressions=[ Literal(this='%04X', is_string=True), Length( this=Column( this=Identifier(this=l, quoted=False)))]), expression=Column( this=Identifier(this=h, quoted=False)), safe=True), expression=Repeat( this=Literal(this='00', is_string=True), times=Mul( this=Greatest( this=Literal(this=0, is_string=False), expressions=[ Sub( this=Literal(this=4, is_string=False), expression=Length( this=Column( this=Identifier(this=l, quoted=False))))], ignore_nulls=True), expression=Literal(this=2, is_string=False))), safe=True)))], default=Unhex( this=DPipe( this=Literal(this='08000000000000000000', is_string=True), expression=Column( this=Identifier(this=h, quoted=False)), safe=True)))], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=l, quoted=False)), Alias( this=Coalesce( this=Anonymous( this=LIST_REDUCE, expressions=[ Anonymous( this=LIST_TRANSFORM, expressions=[ Column( this=Identifier(this=l, quoted=False)), Lambda( this=Anonymous( this=PRINTF, expressions=[ Literal(this='%02X%02X', is_string=True), BitwiseAnd( this=Cast( this=Identifier(this=__x, quoted=False), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False)), expression=Literal(this=255, is_string=False)), BitwiseAnd( this=Paren( this=BitwiseRightShift( this=Cast( this=Identifier(this=__x, quoted=False), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False)), expression=Literal(this=8, is_string=False))), expression=Literal(this=255, is_string=False))]), expressions=[ Identifier(this=__x, quoted=False)])]), Lambda( this=DPipe( this=Identifier(this=__a, quoted=False), expression=Identifier(this=__b, quoted=False), safe=True), expressions=[ Identifier(this=__a, quoted=False), Identifier(this=__b, quoted=False)]), Literal(this='', is_string=True)]), expressions=[ Literal(this='', is_string=True)]), alias=Identifier(this=h, quoted=False))], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Anonymous( this=LIST_SORT, expressions=[ Anonymous( this=LIST_DISTINCT, expressions=[ Filter( this=List( expressions=[ Placeholder(this=arg)]), expression=Where( this=Not( this=Is( this=Placeholder(this=arg), expression=Null()))))])]), alias=Identifier(this=l, quoted=False))])))))))
RANDSTR_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Anonymous( this=LISTAGG, expressions=[ Substring( this=Literal(this='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', is_string=True), start=Add( this=Literal(this=1, is_string=False), expression=Cast( this=Floor( this=Mul( this=Column( this=Identifier(this=random_value, quoted=False)), expression=Literal(this=62, is_string=False))), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False))), length=Literal(this=1, is_string=False)), Literal(this='', is_string=True)])], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Div( this=Paren( this=Mod( this=Abs( this=Anonymous( this=HASH, expressions=[ Add( this=Column( this=Identifier(this=i, quoted=False)), expression=Placeholder(this=seed))])), expression=Literal(this=1000, is_string=False))), expression=Literal(this=1000.0, is_string=False), typed=False, safe=False), alias=Identifier(this=random_value, quoted=False))], from_=From( this=Table( this=Anonymous( this=RANGE, expressions=[ Placeholder(this=length)]), alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=i, quoted=False)])))))))
MINHASH_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ JSONObject( expressions=[ JSONKeyValue( this=Literal(this='state', is_string=True), expression=List( expressions=[ Order( this=Column( this=Identifier(this=min_h, quoted=False)), expressions=[ Ordered( this=Column( this=Identifier(this=seed, quoted=False)), nulls_first=True)])])), JSONKeyValue( this=Literal(this='type', is_string=True), expression=Literal(this='minhash', is_string=True)), JSONKeyValue( this=Literal(this='version', is_string=True), expression=Literal(this=1, is_string=False))], return_type=False, encoding=False)], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=seed, quoted=False)), Alias( this=Anonymous( this=LIST_MIN, expressions=[ Anonymous( this=LIST_TRANSFORM, expressions=[ Column( this=Identifier(this=vals, quoted=False)), Lambda( this=Anonymous( this=HASH, expressions=[ DPipe( this=Cast( this=Identifier(this=__v, quoted=False), to=DataType(this=DType.VARCHAR, nested=False), _type=DataType(this=DType.VARCHAR, nested=False)), expression=Cast( this=Column( this=Identifier(this=seed, quoted=False)), to=DataType(this=DType.VARCHAR, nested=False), _type=DataType(this=DType.VARCHAR, nested=False)), safe=True)]), expressions=[ Identifier(this=__v, quoted=False)])])]), alias=Identifier(this=min_h, quoted=False))], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=List( expressions=[ Placeholder(this=expr)]), alias=Identifier(this=vals, quoted=False))]))), joins=[ Join( this=Table( this=Anonymous( this=RANGE, expressions=[ Literal(this=0, is_string=False), Placeholder(this=k)]), alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=seed, quoted=False)])))]))))
MINHASH_COMBINE_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ JSONObject( expressions=[ JSONKeyValue( this=Literal(this='state', is_string=True), expression=List( expressions=[ Order( this=Column( this=Identifier(this=min_h, quoted=False)), expressions=[ Ordered( this=Column( this=Identifier(this=idx, quoted=False)), nulls_first=True)])])), JSONKeyValue( this=Literal(this='type', is_string=True), expression=Literal(this='minhash', is_string=True)), JSONKeyValue( this=Literal(this='version', is_string=True), expression=Literal(this=1, is_string=False))], return_type=False, encoding=False)], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Column( this=Identifier(this=pos, quoted=False)), alias=Identifier(this=idx, quoted=False)), Alias( this=Min( this=Column( this=Identifier(this=val, quoted=False))), alias=Identifier(this=min_h, quoted=False))], from_=From( this=Unnest( expressions=[ List( expressions=[ Placeholder(this=expr)])], alias=TableAlias( this=Identifier(this=_, quoted=False), columns=[ Identifier(this=sig, quoted=False)]), offset=False)), joins=[ Join( this=Unnest( expressions=[ Cast( this=JSONExtract( this=Column( this=Identifier(this=sig, quoted=False)), expression=JSONPath( expressions=[ JSONPathRoot(), JSONPathKey(this=state)]), only_json_types=False), to=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True), _type=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True))], alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=val, quoted=False)]), offset=Identifier(this=pos, quoted=False)))], group=Group( expressions=[ Column( this=Identifier(this=pos, quoted=False))])))))
APPROXIMATE_SIMILARITY_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Div( this=Cast( this=Sum( this=Case( ifs=[ If( this=EQ( this=Column( this=Identifier(this=num_distinct, quoted=False)), expression=Literal(this=1, is_string=False)), true=Literal(this=1, is_string=False))], default=Literal(this=0, is_string=False))), to=DataType(this=DType.DOUBLE, nested=False), _type=DataType(this=DType.DOUBLE, nested=False)), expression=Count( this=Star(), big_int=True), typed=False, safe=False)], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=pos, quoted=False)), Alias( this=Count( this=Distinct( expressions=[ Column( this=Identifier(this=h, quoted=False))]), big_int=True), alias=Identifier(this=num_distinct, quoted=False))], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=h, quoted=False)), Column( this=Identifier(this=pos, quoted=False))], from_=From( this=Unnest( expressions=[ List( expressions=[ Placeholder(this=expr)])], alias=TableAlias( this=Identifier(this=_, quoted=False), columns=[ Identifier(this=sig, quoted=False)]), offset=False)), joins=[ Join( this=Unnest( expressions=[ Cast( this=JSONExtract( this=Column( this=Identifier(this=sig, quoted=False)), expression=JSONPath( expressions=[ JSONPathRoot(), JSONPathKey(this=state)]), only_json_types=False), to=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True), _type=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True))], alias=TableAlias( this=Identifier(this=s, quoted=False), columns=[ Identifier(this=h, quoted=False)]), offset=Identifier(this=pos, quoted=False)))]))), group=Group( expressions=[ Column( this=Identifier(this=pos, quoted=False))])))))
ARRAYS_ZIP_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Placeholder(this=null_check), true=Null()), If( this=Placeholder(this=all_empty_check), true=Array( expressions=[ Placeholder(this=empty_struct)]))], default=Anonymous( this=LIST_TRANSFORM, expressions=[ Anonymous( this=RANGE, expressions=[ Literal(this=0, is_string=False), Placeholder(this=max_len)]), Lambda( this=Placeholder(this=transform_struct), expressions=[ Identifier(this=__i, quoted=False)])]))
ARRAY_BAG_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Or( this=Is( this=Placeholder(this=arr1), expression=Null()), expression=Is( this=Placeholder(this=arr2), expression=Null())), true=Null())], default=Anonymous( this=LIST_TRANSFORM, expressions=[ Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=LIST_ZIP, expressions=[ Placeholder(this=arr1), GenerateSeries( start=Literal(this=1, is_string=False), end=Length( this=Placeholder(this=arr1)))]), Lambda( this=Placeholder(this=cond), expressions=[ Identifier(this=pair, quoted=False)])]), Lambda( this=Bracket( this=Identifier(this=pair, quoted=False), expressions=[ Literal(this=0, is_string=False)]), expressions=[ Identifier(this=pair, quoted=False)])]))
ARRAY_EXCEPT_CONDITION: sqlglot.expressions.core.Expr = GT( this=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Bracket( this=Placeholder(this=arr1), expressions=[ Slice( this=Literal(this=1, is_string=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=1, is_string=False)]))]), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])), expression=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Placeholder(this=arr2), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])))
ARRAY_INTERSECTION_CONDITION: sqlglot.expressions.core.Expr = LTE( this=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Bracket( this=Placeholder(this=arr1), expressions=[ Slice( this=Literal(this=1, is_string=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=1, is_string=False)]))]), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])), expression=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Placeholder(this=arr2), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])))
ARRAY_EXCEPT_SET_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Or( this=Is( this=Placeholder(this=arr1), expression=Null()), expression=Is( this=Placeholder(this=arr2), expression=Null())), true=Null())], default=Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=LIST_DISTINCT, expressions=[ Placeholder(this=arr1)]), Lambda( this=EQ( this=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Placeholder(this=arr2), Lambda( this=NullSafeEQ( this=Identifier(this=x, quoted=False), expression=Identifier(this=e, quoted=False)), expressions=[ Identifier(this=x, quoted=False)])])), expression=Literal(this=0, is_string=False)), expressions=[ Identifier(this=e, quoted=False)])]))
STRTOK_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=And( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), expression=EQ( this=Placeholder(this=string), expression=Literal(this='', is_string=True))), true=Null()), If( this=And( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), expression=EQ( this=Placeholder(this=part_index), expression=Literal(this=1, is_string=False))), true=Placeholder(this=string)), If( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), true=Null()), If( this=LT( this=Placeholder(this=part_index), expression=Literal(this=0, is_string=False)), true=Null()), If( this=Or( this=Or( this=Is( this=Placeholder(this=string), expression=Null()), expression=Is( this=Placeholder(this=delimiter), expression=Null())), expression=Is( this=Placeholder(this=part_index), expression=Null())), true=Null())], default=Placeholder(this=base_func))
def timeslice_sql(self, expression: sqlglot.expressions.temporal.TimeSlice) -> str:
2158    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2159        """
2160        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2161
2162        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2163        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2164
2165        For 'END' kind, add the interval to get the end of the slice.
2166        For DATE type with 'END', cast result back to DATE to preserve type.
2167        """
2168        date_expr = expression.this
2169        slice_length = expression.expression
2170        unit = expression.unit
2171        kind = expression.text("kind").upper()
2172
2173        # Create INTERVAL expression: INTERVAL 'N' UNIT
2174        interval_expr = exp.Interval(this=slice_length, unit=unit)
2175
2176        # Create base time_bucket expression
2177        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2178
2179        # Check if we need the end of the slice (default is start)
2180        if not kind == "END":
2181            # For 'START', return time_bucket directly
2182            return self.sql(time_bucket_expr)
2183
2184        # For 'END', add the interval to get end of slice
2185        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2186
2187        # If input is DATE type, cast result back to DATE to preserve type
2188        # DuckDB converts DATE to TIMESTAMP when adding intervals
2189        if date_expr.is_type(exp.DType.DATE):
2190            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2191
2192        return self.sql(add_expr)

Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.

Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END']) DuckDB: time_bucket(INTERVAL 'slice_length' UNIT, date_expr)

For 'END' kind, add the interval to get the end of the slice. For DATE type with 'END', cast result back to DATE to preserve type.

def bitmapbucketnumber_sql(self, expression: sqlglot.expressions.math.BitmapBucketNumber) -> str:
2194    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2195        """
2196        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2197
2198        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2199        - Each bucket covers 32,768 values
2200        - Bucket numbering starts at 1
2201        - Formula: ((value - 1) // 32768) + 1 for positive values
2202
2203        For non-positive values (0 and negative), we use value // 32768 to avoid
2204        producing bucket 0 or positive bucket IDs for negative inputs.
2205        """
2206        value = expression.this
2207
2208        positive_formula = ((value - 1) // 32768) + 1
2209        non_positive_formula = value // 32768
2210
2211        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2212        case_expr = (
2213            exp.case()
2214            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2215            .else_(non_positive_formula)
2216        )
2217        return self.sql(case_expr)

Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.

Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:

  • Each bucket covers 32,768 values
  • Bucket numbering starts at 1
  • Formula: ((value - 1) // 32768) + 1 for positive values

For non-positive values (0 and negative), we use value // 32768 to avoid producing bucket 0 or positive bucket IDs for negative inputs.

def bitmapbitposition_sql(self, expression: sqlglot.expressions.math.BitmapBitPosition) -> str:
2219    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2220        """
2221        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2222
2223        Snowflake's BITMAP_BIT_POSITION behavior:
2224        - For n <= 0: returns ABS(n) % 32768
2225        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2226        """
2227        this = expression.this
2228
2229        return self.sql(
2230            exp.Mod(
2231                this=exp.Paren(
2232                    this=exp.If(
2233                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2234                        true=this - exp.Literal.number(1),
2235                        false=exp.Abs(this=this),
2236                    )
2237                ),
2238                expression=MAX_BIT_POSITION,
2239            )
2240        )

Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.

Snowflake's BITMAP_BIT_POSITION behavior:

  • For n <= 0: returns ABS(n) % 32768
  • For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
def bitmapconstructagg_sql(self, expression: sqlglot.expressions.math.BitmapConstructAgg) -> str:
2242    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2243        """
2244        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2245        Uses a pre-parsed template with placeholders replaced by expression nodes.
2246
2247        Snowflake bitmap format:
2248        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2249        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2250        """
2251        arg = expression.this
2252        return (
2253            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2254        )

Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent. Uses a pre-parsed template with placeholders replaced by expression nodes.

Snowflake bitmap format:

  • Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
  • Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
def compress_sql(self, expression: sqlglot.expressions.string.Compress) -> str:
2256    def compress_sql(self, expression: exp.Compress) -> str:
2257        self.unsupported("DuckDB does not support the COMPRESS() function")
2258        return self.function_fallback_sql(expression)
def encrypt_sql(self, expression: sqlglot.expressions.string.Encrypt) -> str:
2260    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2261        self.unsupported("ENCRYPT is not supported in DuckDB")
2262        return self.function_fallback_sql(expression)
def decrypt_sql(self, expression: sqlglot.expressions.string.Decrypt) -> str:
2264    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2265        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2266        self.unsupported(f"{func_name} is not supported in DuckDB")
2267        return self.function_fallback_sql(expression)
def decryptraw_sql(self, expression: sqlglot.expressions.string.DecryptRaw) -> str:
2269    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2270        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2271        self.unsupported(f"{func_name} is not supported in DuckDB")
2272        return self.function_fallback_sql(expression)
def encryptraw_sql(self, expression: sqlglot.expressions.string.EncryptRaw) -> str:
2274    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2275        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2276        return self.function_fallback_sql(expression)
def parseurl_sql(self, expression: sqlglot.expressions.string.ParseUrl) -> str:
2278    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2279        self.unsupported("PARSE_URL is not supported in DuckDB")
2280        return self.function_fallback_sql(expression)
def parseip_sql(self, expression: sqlglot.expressions.functions.ParseIp) -> str:
2282    def parseip_sql(self, expression: exp.ParseIp) -> str:
2283        self.unsupported("PARSE_IP is not supported in DuckDB")
2284        return self.function_fallback_sql(expression)
def jarowinklersimilarity_sql(self, expression: sqlglot.expressions.math.JarowinklerSimilarity) -> str:
2286    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2287        this = expression.this
2288        expr = expression.expression
2289
2290        if expression.args.get("case_insensitive"):
2291            this = exp.Upper(this=this)
2292            expr = exp.Upper(this=expr)
2293
2294        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2295
2296        if expression.args.get("integer_scale"):
2297            result = exp.cast(result * 100, "INTEGER")
2298
2299        return self.sql(result)
def nthvalue_sql(self, expression: sqlglot.expressions.aggregate.NthValue) -> str:
2301    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2302        from_first = expression.args.get("from_first", True)
2303        if not from_first:
2304            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2305
2306        return self.function_fallback_sql(expression)
def randstr_sql(self, expression: sqlglot.expressions.functions.Randstr) -> str:
2308    def randstr_sql(self, expression: exp.Randstr) -> str:
2309        """
2310        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2311        Uses a pre-parsed template with placeholders replaced by expression nodes.
2312
2313        RANDSTR(length, generator) generates a random string of specified length.
2314        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2315        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2316        - No generator: Use default seed value
2317        """
2318        length = expression.this
2319        generator = expression.args.get("generator")
2320
2321        if generator:
2322            if isinstance(generator, exp.Rand):
2323                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2324                seed_value = generator.this or generator
2325            else:
2326                # Const/int or other expression - use as seed directly
2327                seed_value = generator
2328        else:
2329            # No generator specified, use default seed (arbitrary but deterministic)
2330            seed_value = exp.Literal.number(RANDSTR_SEED)
2331
2332        replacements = {"seed": seed_value, "length": length}
2333        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"

Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. Uses a pre-parsed template with placeholders replaced by expression nodes.

RANDSTR(length, generator) generates a random string of specified length.

  • With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
  • With RANDOM(): Use RANDOM() in the hash for non-deterministic output
  • No generator: Use default seed value
@unsupported_args('finish')
def reduce_sql(self, expression: sqlglot.expressions.array.Reduce) -> str:
2335    @unsupported_args("finish")
2336    def reduce_sql(self, expression: exp.Reduce) -> str:
2337        array_arg = expression.this
2338        initial_value = expression.args.get("initial")
2339        merge_lambda = expression.args.get("merge")
2340
2341        if merge_lambda:
2342            merge_lambda.set("colon", True)
2343
2344        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
def zipf_sql(self, expression: sqlglot.expressions.functions.Zipf) -> str:
2346    def zipf_sql(self, expression: exp.Zipf) -> str:
2347        """
2348        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2349        Uses a pre-parsed template with placeholders replaced by expression nodes.
2350        """
2351        s = expression.this
2352        n = expression.args["elementcount"]
2353        gen = expression.args["gen"]
2354
2355        if not isinstance(gen, exp.Rand):
2356            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2357            random_expr: exp.Expr = exp.Div(
2358                this=exp.Paren(
2359                    this=exp.Mod(
2360                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2361                        expression=exp.Literal.number(1000000),
2362                    )
2363                ),
2364                expression=exp.Literal.number(1000000.0),
2365            )
2366        else:
2367            # Use RANDOM() for non-deterministic output
2368            random_expr = exp.Rand()
2369
2370        replacements = {"s": s, "n": n, "random_expr": random_expr}
2371        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"

Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. Uses a pre-parsed template with placeholders replaced by expression nodes.

def tobinary_sql(self, expression: sqlglot.expressions.string.ToBinary) -> str:
2373    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2374        """
2375        TO_BINARY and TRY_TO_BINARY transpilation:
2376        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2377        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2378        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2379
2380        For TRY_TO_BINARY (safe=True), wrap with TRY():
2381        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2382        """
2383        value = expression.this
2384        format_arg = expression.args.get("format")
2385        is_safe = expression.args.get("safe")
2386        is_binary = _is_binary(expression)
2387
2388        if not format_arg and not is_binary:
2389            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2390            return self.func(func_name, value)
2391
2392        # Snowflake defaults to HEX encoding when no format is specified
2393        fmt = format_arg.name.upper() if format_arg else "HEX"
2394
2395        if fmt in ("UTF-8", "UTF8"):
2396            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2397            result = self.func("ENCODE", value)
2398        elif fmt == "BASE64":
2399            result = self.func("FROM_BASE64", value)
2400        elif fmt == "HEX":
2401            result = self.func("UNHEX", value)
2402        else:
2403            if is_safe:
2404                return self.sql(exp.null())
2405            else:
2406                self.unsupported(f"format {fmt} is not supported")
2407                result = self.func("TO_BINARY", value)
2408        return f"TRY({result})" if is_safe else result

TO_BINARY and TRY_TO_BINARY transpilation:

  • 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
  • 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
  • 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')

For TRY_TO_BINARY (safe=True), wrap with TRY():

  • 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
def tonumber_sql(self, expression: sqlglot.expressions.string.ToNumber) -> str:
2410    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2411        fmt = expression.args.get("format")
2412        precision = expression.args.get("precision")
2413        scale = expression.args.get("scale")
2414
2415        if not fmt and precision and scale:
2416            return self.sql(
2417                exp.cast(
2418                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2419                )
2420            )
2421
2422        return super().tonumber_sql(expression)
def generator_sql(self, expression: sqlglot.expressions.array.Generator) -> str:
2448    def generator_sql(self, expression: exp.Generator) -> str:
2449        # Transpile Snowflake GENERATOR to DuckDB range()
2450        rowcount = expression.args.get("rowcount")
2451        time_limit = expression.args.get("time_limit")
2452
2453        if time_limit:
2454            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2455
2456        if not rowcount:
2457            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2458            return self.func("range", exp.Literal.number(0))
2459
2460        return self.func("range", rowcount)
def greatest_sql(self, expression: sqlglot.expressions.functions.Greatest) -> str:
2462    def greatest_sql(self, expression: exp.Greatest) -> str:
2463        return self._greatest_least_sql(expression)
def least_sql(self, expression: sqlglot.expressions.functions.Least) -> str:
2465    def least_sql(self, expression: exp.Least) -> str:
2466        return self._greatest_least_sql(expression)
def lambda_sql( self, expression: sqlglot.expressions.query.Lambda, arrow_sep: str = '->', wrap: bool = True) -> str:
2468    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2469        if expression.args.get("colon"):
2470            prefix = "LAMBDA "
2471            arrow_sep = ":"
2472            wrap = False
2473        else:
2474            prefix = ""
2475
2476        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2477        return f"{prefix}{lambda_sql}"
def show_sql(self, expression: sqlglot.expressions.ddl.Show) -> str:
2479    def show_sql(self, expression: exp.Show) -> str:
2480        from_ = self.sql(expression, "from_")
2481        from_ = f" FROM {from_}" if from_ else ""
2482        return f"SHOW {expression.name}{from_}"
def soundex_sql(self, expression: sqlglot.expressions.string.Soundex) -> str:
2484    def soundex_sql(self, expression: exp.Soundex) -> str:
2485        self.unsupported("SOUNDEX is not supported in DuckDB")
2486        return self.func("SOUNDEX", expression.this)
def sortarray_sql(self, expression: sqlglot.expressions.array.SortArray) -> str:
2488    def sortarray_sql(self, expression: exp.SortArray) -> str:
2489        arr = expression.this
2490        asc = expression.args.get("asc")
2491        nulls_first = expression.args.get("nulls_first")
2492
2493        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2494            return self.func("LIST_SORT", arr, asc, nulls_first)
2495
2496        nulls_are_first = nulls_first == exp.true()
2497        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2498
2499        if not isinstance(asc, exp.Boolean):
2500            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2501
2502        descending = asc == exp.false()
2503
2504        if not descending and not nulls_are_first:
2505            return self.func("LIST_SORT", arr)
2506        if not nulls_are_first:
2507            return self.func("ARRAY_REVERSE_SORT", arr)
2508        return self.func(
2509            "LIST_SORT",
2510            arr,
2511            exp.Literal.string("DESC" if descending else "ASC"),
2512            exp.Literal.string("NULLS FIRST"),
2513        )
def install_sql(self, expression: sqlglot.expressions.ddl.Install) -> str:
2515    def install_sql(self, expression: exp.Install) -> str:
2516        force = "FORCE " if expression.args.get("force") else ""
2517        this = self.sql(expression, "this")
2518        from_clause = expression.args.get("from_")
2519        from_clause = f" FROM {from_clause}" if from_clause else ""
2520        return f"{force}INSTALL {this}{from_clause}"
def approxtopk_sql(self, expression: sqlglot.expressions.aggregate.ApproxTopK) -> str:
2522    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2523        self.unsupported(
2524            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2525        )
2526        return self.function_fallback_sql(expression)
def fromiso8601timestamp_sql( self, expression: sqlglot.expressions.temporal.FromISO8601Timestamp) -> str:
2528    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2529        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
def strposition_sql(self, expression: sqlglot.expressions.string.StrPosition) -> str:
2531    def strposition_sql(self, expression: exp.StrPosition) -> str:
2532        this = expression.this
2533        substr = expression.args.get("substr")
2534        position = expression.args.get("position")
2535
2536        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2537        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2538        if _is_binary(this):
2539            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2540            hex_strpos = exp.StrPosition(
2541                this=exp.Hex(this=this),
2542                substr=exp.Hex(this=substr),
2543            )
2544
2545            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2546
2547        # For VARCHAR: handle clamp_position
2548        if expression.args.get("clamp_position") and position:
2549            expression = expression.copy()
2550            expression.set(
2551                "position",
2552                exp.If(
2553                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2554                    true=exp.Literal.number(1),
2555                    false=position.copy(),
2556                ),
2557            )
2558
2559        return strposition_sql(self, expression)
def substring_sql(self, expression: sqlglot.expressions.string.Substring) -> str:
2561    def substring_sql(self, expression: exp.Substring) -> str:
2562        if expression.args.get("zero_start"):
2563            start = expression.args.get("start")
2564            length = expression.args.get("length")
2565
2566            if start := expression.args.get("start"):
2567                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2568            if length := expression.args.get("length"):
2569                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2570
2571            return self.func("SUBSTRING", expression.this, start, length)
2572
2573        return self.function_fallback_sql(expression)
def strtotime_sql(self, expression: sqlglot.expressions.temporal.StrToTime) -> str:
2575    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2576        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2577        target_type = expression.args.get("target_type")
2578        needs_tz = target_type and target_type.this in (
2579            exp.DType.TIMESTAMPLTZ,
2580            exp.DType.TIMESTAMPTZ,
2581        )
2582
2583        if expression.args.get("safe"):
2584            formatted_time = self.format_time(expression)
2585            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2586            return self.sql(
2587                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2588            )
2589
2590        base_sql = str_to_time_sql(self, expression)
2591        if needs_tz:
2592            return self.sql(
2593                exp.cast(
2594                    base_sql,
2595                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2596                )
2597            )
2598        return base_sql
def strtodate_sql(self, expression: sqlglot.expressions.temporal.StrToDate) -> str:
2600    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2601        formatted_time = self.format_time(expression)
2602        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2603        return self.sql(
2604            exp.cast(
2605                self.func(function_name, expression.this, formatted_time),
2606                exp.DataType(this=exp.DType.DATE),
2607            )
2608        )
def tsordstotime_sql(self, expression: sqlglot.expressions.temporal.TsOrDsToTime) -> str:
2610    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2611        this = expression.this
2612        time_format = self.format_time(expression)
2613        safe = expression.args.get("safe")
2614        time_type = exp.DataType.build("TIME", dialect="duckdb")
2615        cast_expr = exp.TryCast if safe else exp.Cast
2616
2617        if time_format:
2618            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2619            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2620            return self.sql(cast_expr(this=strptime, to=time_type))
2621
2622        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2623            return self.sql(this)
2624
2625        return self.sql(cast_expr(this=this, to=time_type))
def currentdate_sql(self, expression: sqlglot.expressions.temporal.CurrentDate) -> str:
2627    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2628        if not expression.this:
2629            return "CURRENT_DATE"
2630
2631        expr = exp.Cast(
2632            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2633            to=exp.DataType(this=exp.DType.DATE),
2634        )
2635        return self.sql(expr)
def checkjson_sql(self, expression: sqlglot.expressions.json.CheckJson) -> str:
2637    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2638        arg = expression.this
2639        return self.sql(
2640            exp.case()
2641            .when(
2642                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2643                exp.null(),
2644            )
2645            .else_(exp.Literal.string("Invalid JSON"))
2646        )
def parsejson_sql(self, expression: sqlglot.expressions.json.ParseJSON) -> str:
2648    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2649        arg = expression.this
2650        if expression.args.get("safe"):
2651            return self.sql(
2652                exp.case()
2653                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2654                .else_(exp.null())
2655            )
2656        return self.func("JSON", arg)
def unicode_sql(self, expression: sqlglot.expressions.string.Unicode) -> str:
2658    def unicode_sql(self, expression: exp.Unicode) -> str:
2659        if expression.args.get("empty_is_zero"):
2660            return self.sql(
2661                exp.case()
2662                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2663                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2664            )
2665
2666        return self.func("UNICODE", expression.this)
def stripnullvalue_sql(self, expression: sqlglot.expressions.json.StripNullValue) -> str:
2668    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2669        return self.sql(
2670            exp.case()
2671            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2672            .else_(expression.this)
2673        )
def trunc_sql(self, expression: sqlglot.expressions.math.Trunc) -> str:
2675    def trunc_sql(self, expression: exp.Trunc) -> str:
2676        decimals = expression.args.get("decimals")
2677        if (
2678            expression.args.get("fractions_supported")
2679            and decimals
2680            and not decimals.is_type(exp.DType.INT)
2681        ):
2682            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2683
2684        return self.func("TRUNC", expression.this, decimals)
def normal_sql(self, expression: sqlglot.expressions.functions.Normal) -> str:
2686    def normal_sql(self, expression: exp.Normal) -> str:
2687        """
2688        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2689
2690        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2691        """
2692        mean = expression.this
2693        stddev = expression.args["stddev"]
2694        gen: exp.Expr = expression.args["gen"]
2695
2696        # Build two uniform random values [0, 1) for Box-Muller transform
2697        if isinstance(gen, exp.Rand) and gen.this is None:
2698            u1: exp.Expr = exp.Rand()
2699            u2: exp.Expr = exp.Rand()
2700        else:
2701            # Seeded: derive two values using HASH with different inputs
2702            seed = gen.this if isinstance(gen, exp.Rand) else gen
2703            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2704            u2 = exp.replace_placeholders(
2705                self.SEEDED_RANDOM_TEMPLATE,
2706                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2707            )
2708
2709        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2710        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))

Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.

Uses the Box-Muller transform via NORMAL_TEMPLATE.

def uniform_sql(self, expression: sqlglot.expressions.functions.Uniform) -> str:
2712    def uniform_sql(self, expression: exp.Uniform) -> str:
2713        """
2714        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2715
2716        UNIFORM returns a random value in [min, max]:
2717        - Integer result if both min and max are integers
2718        - Float result if either min or max is a float
2719        """
2720        min_val = expression.this
2721        max_val = expression.expression
2722        gen = expression.args.get("gen")
2723
2724        # Determine if result should be integer (both bounds are integers).
2725        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2726        is_int_result = min_val.is_int and max_val.is_int
2727
2728        # Build the random value expression [0, 1)
2729        if not isinstance(gen, exp.Rand):
2730            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2731            random_expr: exp.Expr = exp.Div(
2732                this=exp.Paren(
2733                    this=exp.Mod(
2734                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2735                        expression=exp.Literal.number(1000000),
2736                    )
2737                ),
2738                expression=exp.Literal.number(1000000.0),
2739            )
2740        else:
2741            random_expr = exp.Rand()
2742
2743        # Build: min + random * (max - min [+ 1 for int])
2744        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2745        if is_int_result:
2746            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2747
2748        result: exp.Expr = exp.Add(
2749            this=min_val,
2750            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2751        )
2752
2753        if is_int_result:
2754            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2755
2756        return self.sql(result)

Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.

UNIFORM returns a random value in [min, max]:

  • Integer result if both min and max are integers
  • Float result if either min or max is a float
def timefromparts_sql(self, expression: sqlglot.expressions.temporal.TimeFromParts) -> str:
2758    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2759        nano = expression.args.get("nano")
2760        overflow = expression.args.get("overflow")
2761
2762        # Snowflake's TIME_FROM_PARTS supports overflow
2763        if overflow:
2764            hour = expression.args["hour"]
2765            minute = expression.args["min"]
2766            sec = expression.args["sec"]
2767
2768            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2769            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2770                try:
2771                    h_val = hour.to_py()
2772                    m_val = minute.to_py()
2773                    s_val = sec.to_py()
2774                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2775                        return rename_func("MAKE_TIME")(self, expression)
2776                except ValueError:
2777                    pass
2778
2779            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2780            if nano:
2781                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2782
2783            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2784
2785            return self.sql(
2786                exp.Add(
2787                    this=exp.Cast(
2788                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2789                    ),
2790                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2791                )
2792            )
2793
2794        # Default: MAKE_TIME
2795        if nano:
2796            expression.set(
2797                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2798            )
2799
2800        return rename_func("MAKE_TIME")(self, expression)
def extract_sql(self, expression: sqlglot.expressions.temporal.Extract) -> str:
2802    def extract_sql(self, expression: exp.Extract) -> str:
2803        """
2804        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2805
2806        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2807        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2808        """
2809        this = expression.this
2810        datetime_expr = expression.expression
2811
2812        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2813        # because Snowflake applies server timezone while DuckDB uses local timezone
2814        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2815            self.unsupported(
2816                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2817            )
2818
2819        part_name = this.name.upper()
2820
2821        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2822            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2823
2824            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2825            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2826            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2827                exp.DType.TIME, exp.DType.TIMETZ
2828            )
2829
2830            if is_nano_time:
2831                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2832                return self.sql(
2833                    exp.cast(
2834                        exp.Mul(
2835                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2836                            expression=exp.Literal.number(1000),
2837                        ),
2838                        exp.DataType.build(cast_type, dialect="duckdb"),
2839                    )
2840                )
2841
2842            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2843            strftime_input = datetime_expr
2844            if part_name == "NANOSECOND":
2845                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2846
2847            return self.sql(
2848                exp.cast(
2849                    exp.Anonymous(
2850                        this="STRFTIME",
2851                        expressions=[strftime_input, exp.Literal.string(fmt)],
2852                    ),
2853                    exp.DataType.build(cast_type, dialect="duckdb"),
2854                )
2855            )
2856
2857        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2858            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2859            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2860            # EPOCH returns float, cast to BIGINT for integer result
2861            if part_name == "EPOCH_SECOND":
2862                result = exp.cast(result, exp.DataType.build("BIGINT", dialect="duckdb"))
2863            return self.sql(result)
2864
2865        return super().extract_sql(expression)

Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.

DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND, EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND

def timestampfromparts_sql(self, expression: sqlglot.expressions.temporal.TimestampFromParts) -> str:
2867    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2868        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2869        date_expr = expression.this
2870        time_expr = expression.expression
2871
2872        if date_expr is not None and time_expr is not None:
2873            # In DuckDB, DATE + TIME produces TIMESTAMP
2874            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2875
2876        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2877        sec = expression.args.get("sec")
2878        if sec is None:
2879            # This shouldn't happen with valid input, but handle gracefully
2880            return rename_func("MAKE_TIMESTAMP")(self, expression)
2881
2882        milli = expression.args.get("milli")
2883        if milli is not None:
2884            sec += milli.pop() / exp.Literal.number(1000.0)
2885
2886        nano = expression.args.get("nano")
2887        if nano is not None:
2888            sec += nano.pop() / exp.Literal.number(1000000000.0)
2889
2890        if milli or nano:
2891            expression.set("sec", sec)
2892
2893        return rename_func("MAKE_TIMESTAMP")(self, expression)
@unsupported_args('nano')
def timestampltzfromparts_sql( self, expression: sqlglot.expressions.temporal.TimestampLtzFromParts) -> str:
2895    @unsupported_args("nano")
2896    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2897        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2898        if nano := expression.args.get("nano"):
2899            nano.pop()
2900
2901        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2902        return f"CAST({timestamp} AS TIMESTAMPTZ)"
@unsupported_args('nano')
def timestamptzfromparts_sql( self, expression: sqlglot.expressions.temporal.TimestampTzFromParts) -> str:
2904    @unsupported_args("nano")
2905    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2906        # Extract zone before popping
2907        zone = expression.args.get("zone")
2908        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2909        if zone:
2910            zone = zone.pop()
2911
2912        if nano := expression.args.get("nano"):
2913            nano.pop()
2914
2915        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2916
2917        if zone:
2918            # Use AT TIME ZONE to apply the explicit timezone
2919            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2920
2921        return timestamp
def tablesample_sql( self, expression: sqlglot.expressions.query.TableSample, tablesample_keyword: str | None = None) -> str:
2923    def tablesample_sql(
2924        self,
2925        expression: exp.TableSample,
2926        tablesample_keyword: str | None = None,
2927    ) -> str:
2928        if not isinstance(expression.parent, exp.Select):
2929            # This sample clause only applies to a single source, not the entire resulting relation
2930            tablesample_keyword = "TABLESAMPLE"
2931
2932        if expression.args.get("size"):
2933            method = expression.args.get("method")
2934            if method and method.name.upper() != "RESERVOIR":
2935                self.unsupported(
2936                    f"Sampling method {method} is not supported with a discrete sample count, "
2937                    "defaulting to reservoir sampling"
2938                )
2939                expression.set("method", exp.var("RESERVOIR"))
2940
2941        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
def join_sql(self, expression: sqlglot.expressions.query.Join) -> str:
2943    def join_sql(self, expression: exp.Join) -> str:
2944        if (
2945            not expression.args.get("using")
2946            and not expression.args.get("on")
2947            and not expression.method
2948            and (expression.kind in ("", "INNER", "OUTER"))
2949        ):
2950            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2951            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2952            if isinstance(expression.this, exp.Unnest):
2953                return super().join_sql(expression.on(exp.true()))
2954
2955            expression.set("side", None)
2956            expression.set("kind", None)
2957
2958        return super().join_sql(expression)
def countif_sql(self, expression: sqlglot.expressions.aggregate.CountIf) -> str:
2960    def countif_sql(self, expression: exp.CountIf) -> str:
2961        if self.dialect.version >= (1, 2):
2962            return self.function_fallback_sql(expression)
2963
2964        # https://github.com/tobymao/sqlglot/pull/4749
2965        return count_if_to_sum(self, expression)
def bracket_sql(self, expression: sqlglot.expressions.core.Bracket) -> str:
2967    def bracket_sql(self, expression: exp.Bracket) -> str:
2968        if self.dialect.version >= (1, 2):
2969            return super().bracket_sql(expression)
2970
2971        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
2972        this = expression.this
2973        if isinstance(this, exp.Array):
2974            this.replace(exp.paren(this))
2975
2976        bracket = super().bracket_sql(expression)
2977
2978        if not expression.args.get("returns_list_for_maps"):
2979            if not this.type:
2980                from sqlglot.optimizer.annotate_types import annotate_types
2981
2982                this = annotate_types(this, dialect=self.dialect)
2983
2984            if this.is_type(exp.DType.MAP):
2985                bracket = f"({bracket})[1]"
2986
2987        return bracket
def withingroup_sql(self, expression: sqlglot.expressions.core.WithinGroup) -> str:
2989    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
2990        func = expression.this
2991
2992        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
2993        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
2994        if isinstance(func, exp.ArrayAgg):
2995            if not isinstance(order := expression.expression, exp.Order):
2996                return self.sql(func)
2997
2998            # Save the original column for FILTER clause (before wrapping with Order)
2999            original_this = func.this
3000
3001            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3002            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3003            func.set(
3004                "this",
3005                exp.Order(
3006                    this=func.this.copy(),
3007                    expressions=order.expressions,
3008                ),
3009            )
3010
3011            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3012            # Use original_this (not the Order-wrapped version) for the FILTER condition
3013            array_agg_sql = self.function_fallback_sql(func)
3014            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3015
3016        # For other functions (like PERCENTILES), use existing logic
3017        expression_sql = self.sql(expression, "expression")
3018
3019        if isinstance(func, exp.PERCENTILES):
3020            # Make the order key the first arg and slide the fraction to the right
3021            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3022            order_col = expression.find(exp.Ordered)
3023            if order_col:
3024                func.set("expression", func.this)
3025                func.set("this", order_col.this)
3026
3027        this = self.sql(expression, "this").rstrip(")")
3028
3029        return f"{this}{expression_sql})"
def length_sql(self, expression: sqlglot.expressions.string.Length) -> str:
3031    def length_sql(self, expression: exp.Length) -> str:
3032        arg = expression.this
3033
3034        # Dialects like BQ and Snowflake also accept binary values as args, so
3035        # DDB will attempt to infer the type or resort to case/when resolution
3036        if not expression.args.get("binary") or arg.is_string:
3037            return self.func("LENGTH", arg)
3038
3039        if not arg.type:
3040            from sqlglot.optimizer.annotate_types import annotate_types
3041
3042            arg = annotate_types(arg, dialect=self.dialect)
3043
3044        if arg.is_type(*exp.DataType.TEXT_TYPES):
3045            return self.func("LENGTH", arg)
3046
3047        # We need these casts to make duckdb's static type checker happy
3048        blob = exp.cast(arg, exp.DType.VARBINARY)
3049        varchar = exp.cast(arg, exp.DType.VARCHAR)
3050
3051        case = (
3052            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3053            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3054            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3055        )
3056        return self.sql(case)
def bitlength_sql(self, expression: sqlglot.expressions.string.BitLength) -> str:
3058    def bitlength_sql(self, expression: exp.BitLength) -> str:
3059        if not _is_binary(arg := expression.this):
3060            return self.func("BIT_LENGTH", arg)
3061
3062        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3063        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
def chr_sql( self, expression: sqlglot.expressions.string.Chr, name: str = 'CHR') -> str:
3065    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3066        arg = expression.expressions[0]
3067        if arg.is_type(*exp.DataType.REAL_TYPES):
3068            arg = exp.cast(arg, exp.DType.INT)
3069        return self.func("CHR", arg)
def collation_sql(self, expression: sqlglot.expressions.functions.Collation) -> str:
3071    def collation_sql(self, expression: exp.Collation) -> str:
3072        self.unsupported("COLLATION function is not supported by DuckDB")
3073        return self.function_fallback_sql(expression)
def collate_sql(self, expression: sqlglot.expressions.functions.Collate) -> str:
3075    def collate_sql(self, expression: exp.Collate) -> str:
3076        if not expression.expression.is_string:
3077            return super().collate_sql(expression)
3078
3079        raw = expression.expression.name
3080        if not raw:
3081            return self.sql(expression.this)
3082
3083        parts = []
3084        for part in raw.split("-"):
3085            lower = part.lower()
3086            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3087                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3088                    self.unsupported(
3089                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3090                    )
3091                parts.append(lower)
3092
3093        if not parts:
3094            return self.sql(expression.this)
3095        return super().collate_sql(
3096            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3097        )
def regexpcount_sql(self, expression: sqlglot.expressions.string.RegexpCount) -> str:
3129    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3130        this = expression.this
3131        pattern = expression.expression
3132        position = expression.args.get("position")
3133        parameters = expression.args.get("parameters")
3134
3135        # Validate flags - only "ims" flags are supported for embedded patterns
3136        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3137
3138        if position:
3139            this = exp.Substring(this=this, start=position)
3140
3141        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3142        if validated_flags:
3143            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3144
3145        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3146        result = (
3147            exp.case()
3148            .when(
3149                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3150                exp.Literal.number(0),
3151            )
3152            .else_(
3153                exp.Length(
3154                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3155                )
3156            )
3157        )
3158
3159        return self.sql(result)
def regexpreplace_sql(self, expression: sqlglot.expressions.string.RegexpReplace) -> str:
3161    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3162        subject = expression.this
3163        pattern = expression.expression
3164        replacement = expression.args.get("replacement") or exp.Literal.string("")
3165        position = expression.args.get("position")
3166        occurrence = expression.args.get("occurrence")
3167        modifiers = expression.args.get("modifiers")
3168
3169        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3170
3171        # Handle occurrence (only literals supported)
3172        if occurrence and not occurrence.is_int:
3173            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3174        else:
3175            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3176            if occurrence > 1:
3177                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3178            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3179            elif (
3180                occurrence == 0
3181                and "g" not in validated_flags
3182                and not expression.args.get("single_replace")
3183            ):
3184                validated_flags += "g"
3185
3186        # Handle position (only literals supported)
3187        prefix = None
3188        if position and not position.is_int:
3189            self.unsupported("REGEXP_REPLACE with non-literal position")
3190        elif position and position.is_int and position.to_py() > 1:
3191            pos = position.to_py()
3192            prefix = exp.Substring(
3193                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3194            )
3195            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3196
3197        result: exp.Expr = exp.Anonymous(
3198            this="REGEXP_REPLACE",
3199            expressions=[
3200                subject,
3201                pattern,
3202                replacement,
3203                exp.Literal.string(validated_flags) if validated_flags else None,
3204            ],
3205        )
3206
3207        if prefix:
3208            result = exp.Concat(expressions=[prefix, result])
3209
3210        return self.sql(result)
def regexplike_sql(self, expression: sqlglot.expressions.core.RegexpLike) -> str:
3212    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3213        this = expression.this
3214        pattern = expression.expression
3215        flag = expression.args.get("flag")
3216
3217        if expression.args.get("full_match"):
3218            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3219            flag = exp.Literal.string(validated_flags) if validated_flags else None
3220            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3221
3222        return self.func("REGEXP_MATCHES", this, pattern, flag)
@unsupported_args('ins_cost', 'del_cost', 'sub_cost')
def levenshtein_sql(self, expression: sqlglot.expressions.string.Levenshtein) -> str:
3224    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3225    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3226        this = expression.this
3227        expr = expression.expression
3228        max_dist = expression.args.get("max_dist")
3229
3230        if max_dist is None:
3231            return self.func("LEVENSHTEIN", this, expr)
3232
3233        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3234        levenshtein = exp.Levenshtein(this=this, expression=expr)
3235        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
def pad_sql(self, expression: sqlglot.expressions.string.Pad) -> str:
3237    def pad_sql(self, expression: exp.Pad) -> str:
3238        """
3239        Handle RPAD/LPAD for VARCHAR and BINARY types.
3240
3241        For VARCHAR: Delegate to parent class
3242        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3243        """
3244        string_arg = expression.this
3245        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3246
3247        if _is_binary(string_arg) or _is_binary(fill_arg):
3248            length_arg = expression.expression
3249            is_left = expression.args.get("is_left")
3250
3251            input_len = exp.ByteLength(this=string_arg)
3252            chars_needed = length_arg - input_len
3253            pad_count = exp.Greatest(
3254                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3255            )
3256            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3257
3258            left, right = string_arg, repeat_expr
3259            if is_left:
3260                left, right = right, left
3261
3262            result = exp.DPipe(this=left, expression=right)
3263            return self.sql(result)
3264
3265        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3266        return super().pad_sql(expression)

Handle RPAD/LPAD for VARCHAR and BINARY types.

For VARCHAR: Delegate to parent class For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))

def minhash_sql(self, expression: sqlglot.expressions.aggregate.Minhash) -> str:
3268    def minhash_sql(self, expression: exp.Minhash) -> str:
3269        k = expression.this
3270        exprs = expression.expressions
3271
3272        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3273            self.unsupported(
3274                "MINHASH with multiple expressions or * requires manual query restructuring"
3275            )
3276            return self.func("MINHASH", k, *exprs)
3277
3278        expr = exprs[0]
3279        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3280        return f"({self.sql(result)})"
def minhashcombine_sql(self, expression: sqlglot.expressions.aggregate.MinhashCombine) -> str:
3282    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3283        expr = expression.this
3284        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3285        return f"({self.sql(result)})"
def approximatesimilarity_sql( self, expression: sqlglot.expressions.aggregate.ApproximateSimilarity) -> str:
3287    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3288        expr = expression.this
3289        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3290        return f"({self.sql(result)})"
def arrayuniqueagg_sql(self, expression: sqlglot.expressions.aggregate.ArrayUniqueAgg) -> str:
3292    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3293        return self.sql(
3294            exp.Filter(
3295                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3296                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3297            )
3298        )
def arrayunionagg_sql(self, expression: sqlglot.expressions.aggregate.ArrayUnionAgg) -> str:
3300    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3301        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3302        return self.function_fallback_sql(expression)
def arraydistinct_sql(self, expression: sqlglot.expressions.array.ArrayDistinct) -> str:
3304    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3305        arr = expression.this
3306        func = self.func("LIST_DISTINCT", arr)
3307
3308        if expression.args.get("check_null"):
3309            add_null_to_array = exp.func(
3310                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3311            )
3312            return self.sql(
3313                exp.If(
3314                    this=exp.NEQ(
3315                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3316                    ),
3317                    true=add_null_to_array,
3318                    false=func,
3319                )
3320            )
3321
3322        return func
def arrayintersect_sql(self, expression: sqlglot.expressions.array.ArrayIntersect) -> str:
3324    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3325        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3326            return self._array_bag_sql(
3327                self.ARRAY_INTERSECTION_CONDITION,
3328                expression.expressions[0],
3329                expression.expressions[1],
3330            )
3331        return self.function_fallback_sql(expression)
def arrayexcept_sql(self, expression: sqlglot.expressions.array.ArrayExcept) -> str:
3333    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3334        arr1, arr2 = expression.this, expression.expression
3335        if expression.args.get("is_multiset"):
3336            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3337        return self.sql(
3338            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3339        )
def arrayslice_sql(self, expression: sqlglot.expressions.array.ArraySlice) -> str:
3341    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3342        """
3343        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3344        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3345        expressions that adjust the index at query time:
3346          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3347          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3348        """
3349        start, end = expression.args.get("start"), expression.args.get("end")
3350
3351        if expression.args.get("zero_based"):
3352            if start is not None:
3353                start = (
3354                    exp.case()
3355                    .when(
3356                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3357                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3358                    )
3359                    .else_(start)
3360                )
3361            if end is not None:
3362                end = (
3363                    exp.case()
3364                    .when(
3365                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3366                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3367                    )
3368                    .else_(end)
3369                )
3370
3371        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))

Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE expressions that adjust the index at query time:

  • start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
  • end: CASE WHEN end < 0 THEN end - 1 ELSE end END
def arrayszip_sql(self, expression: sqlglot.expressions.array.ArraysZip) -> str:
3373    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3374        args = expression.expressions
3375
3376        if not args:
3377            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3378            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3379
3380        # Build placeholder values for template
3381        lengths = [exp.Length(this=arg) for arg in args]
3382        max_len = (
3383            lengths[0]
3384            if len(lengths) == 1
3385            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3386        )
3387
3388        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3389        empty_struct = exp.func(
3390            "STRUCT",
3391            *[
3392                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3393                for i in range(len(args))
3394            ],
3395        )
3396
3397        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3398        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3399        index = exp.column("__i") + 1
3400        transform_struct = exp.func(
3401            "STRUCT",
3402            *[
3403                exp.PropertyEQ(
3404                    this=exp.Literal.string(f"${i + 1}"),
3405                    expression=exp.func("COALESCE", arg, exp.array())[index],
3406                )
3407                for i, arg in enumerate(args)
3408            ],
3409        )
3410
3411        result = exp.replace_placeholders(
3412            self.ARRAYS_ZIP_TEMPLATE.copy(),
3413            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3414            all_empty_check=exp.and_(
3415                *[
3416                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3417                    for arg in args
3418                ]
3419            ),
3420            empty_struct=empty_struct,
3421            max_len=max_len,
3422            transform_struct=transform_struct,
3423        )
3424        return self.sql(result)
def lower_sql(self, expression: sqlglot.expressions.string.Lower) -> str:
3426    def lower_sql(self, expression: exp.Lower) -> str:
3427        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3428        return _gen_with_cast_to_blob(self, expression, result_sql)
def upper_sql(self, expression: sqlglot.expressions.string.Upper) -> str:
3430    def upper_sql(self, expression: exp.Upper) -> str:
3431        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3432        return _gen_with_cast_to_blob(self, expression, result_sql)
def reverse_sql(self, expression: sqlglot.expressions.string.Reverse) -> str:
3434    def reverse_sql(self, expression: exp.Reverse) -> str:
3435        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3436        return _gen_with_cast_to_blob(self, expression, result_sql)
def left_sql(self, expression: sqlglot.expressions.string.Left) -> str:
3462    def left_sql(self, expression: exp.Left) -> str:
3463        return self._left_right_sql(expression, "LEFT")
def right_sql(self, expression: sqlglot.expressions.string.Right) -> str:
3465    def right_sql(self, expression: exp.Right) -> str:
3466        return self._left_right_sql(expression, "RIGHT")
def rtrimmedlength_sql(self, expression: sqlglot.expressions.string.RtrimmedLength) -> str:
3468    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3469        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
def stuff_sql(self, expression: sqlglot.expressions.string.Stuff) -> str:
3471    def stuff_sql(self, expression: exp.Stuff) -> str:
3472        base = expression.this
3473        start = expression.args["start"]
3474        length = expression.args["length"]
3475        insertion = expression.expression
3476        is_binary = _is_binary(base)
3477
3478        if is_binary:
3479            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3480            # (each byte = 2 hex chars), then UNHEX back to BLOB
3481            base = exp.Hex(this=base)
3482            insertion = exp.Hex(this=insertion)
3483            left = exp.Substring(
3484                this=base.copy(),
3485                start=exp.Literal.number(1),
3486                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3487            )
3488            right = exp.Substring(
3489                this=base.copy(),
3490                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3491                + exp.Literal.number(1),
3492            )
3493        else:
3494            left = exp.Substring(
3495                this=base.copy(),
3496                start=exp.Literal.number(1),
3497                length=start.copy() - exp.Literal.number(1),
3498            )
3499            right = exp.Substring(this=base.copy(), start=start + length)
3500        result: exp.Expr = exp.DPipe(
3501            this=exp.DPipe(this=left, expression=insertion), expression=right
3502        )
3503
3504        if is_binary:
3505            result = exp.Unhex(this=result)
3506
3507        return self.sql(result)
def rand_sql(self, expression: sqlglot.expressions.functions.Rand) -> str:
3509    def rand_sql(self, expression: exp.Rand) -> str:
3510        seed = expression.this
3511        if seed is not None:
3512            self.unsupported("RANDOM with seed is not supported in DuckDB")
3513
3514        lower = expression.args.get("lower")
3515        upper = expression.args.get("upper")
3516
3517        if lower and upper:
3518            # scale DuckDB's [0,1) to the specified range
3519            range_size = exp.paren(upper - lower)
3520            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3521
3522            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3523            result = exp.cast(scaled, exp.DType.BIGINT)
3524            return self.sql(result)
3525
3526        # Default DuckDB behavior - just return RANDOM() as float
3527        return "RANDOM()"
def bytelength_sql(self, expression: sqlglot.expressions.string.ByteLength) -> str:
3529    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3530        arg = expression.this
3531
3532        # Check if it's a text type (handles both literals and annotated expressions)
3533        if arg.is_type(*exp.DataType.TEXT_TYPES):
3534            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3535
3536        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3537        return self.func("OCTET_LENGTH", arg)
def base64encode_sql(self, expression: sqlglot.expressions.string.Base64Encode) -> str:
3539    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3540        # DuckDB TO_BASE64 requires BLOB input
3541        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3542        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3543        result = expression.this
3544
3545        # Check if input is a string type - ENCODE only accepts VARCHAR
3546        if result.is_type(*exp.DataType.TEXT_TYPES):
3547            result = exp.Encode(this=result)
3548
3549        result = exp.ToBase64(this=result)
3550
3551        max_line_length = expression.args.get("max_line_length")
3552        alphabet = expression.args.get("alphabet")
3553
3554        # Handle custom alphabet by replacing standard chars with custom ones
3555        result = _apply_base64_alphabet_replacements(result, alphabet)
3556
3557        # Handle max_line_length by inserting newlines every N characters
3558        line_length = (
3559            t.cast(int, max_line_length.to_py())
3560            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3561            else 0
3562        )
3563        if line_length > 0:
3564            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3565            result = exp.Trim(
3566                this=exp.RegexpReplace(
3567                    this=result,
3568                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3569                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3570                ),
3571                expression=newline,
3572                position="TRAILING",
3573            )
3574
3575        return self.sql(result)
def replace_sql(self, expression: sqlglot.expressions.string.Replace) -> str:
3577    def replace_sql(self, expression: exp.Replace) -> str:
3578        result_sql = self.func(
3579            "REPLACE",
3580            _cast_to_varchar(expression.this),
3581            _cast_to_varchar(expression.expression),
3582            _cast_to_varchar(expression.args.get("replacement")),
3583        )
3584        return _gen_with_cast_to_blob(self, expression, result_sql)
def bitwisexor_sql(self, expression: sqlglot.expressions.core.BitwiseXor) -> str:
3591    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3592        _prepare_binary_bitwise_args(expression)
3593        result_sql = self.func("XOR", expression.this, expression.expression)
3594        return _gen_with_cast_to_blob(self, expression, result_sql)
def objectinsert_sql(self, expression: sqlglot.expressions.json.ObjectInsert) -> str:
3596    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3597        this = expression.this
3598        key = expression.args.get("key")
3599        key_sql = key.name if isinstance(key, exp.Expr) else ""
3600        value_sql = self.sql(expression, "value")
3601
3602        kv_sql = f"{key_sql} := {value_sql}"
3603
3604        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3605        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3606        if isinstance(this, exp.Struct) and not this.expressions:
3607            return self.func("STRUCT_PACK", kv_sql)
3608
3609        return self.func("STRUCT_INSERT", this, kv_sql)
def mapcat_sql(self, expression: sqlglot.expressions.array.MapCat) -> str:
3611    def mapcat_sql(self, expression: exp.MapCat) -> str:
3612        result = exp.replace_placeholders(
3613            self.MAPCAT_TEMPLATE.copy(),
3614            map1=expression.this,
3615            map2=expression.expression,
3616        )
3617        return self.sql(result)
def mapcontainskey_sql(self, expression: sqlglot.expressions.array.MapContainsKey) -> str:
3619    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3620        return self.func(
3621            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3622        )
def mapdelete_sql(self, expression: sqlglot.expressions.array.MapDelete) -> str:
3624    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3625        map_arg = expression.this
3626        keys_to_delete = expression.expressions
3627
3628        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3629
3630        lambda_expr = exp.Lambda(
3631            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3632            expressions=[exp.to_identifier("x")],
3633        )
3634        result = exp.func(
3635            "MAP_FROM_ENTRIES",
3636            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3637        )
3638        return self.sql(result)
def mappick_sql(self, expression: sqlglot.expressions.array.MapPick) -> str:
3640    def mappick_sql(self, expression: exp.MapPick) -> str:
3641        map_arg = expression.this
3642        keys_to_pick = expression.expressions
3643
3644        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3645
3646        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3647            lambda_expr = exp.Lambda(
3648                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3649                expressions=[exp.to_identifier("x")],
3650            )
3651        else:
3652            lambda_expr = exp.Lambda(
3653                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3654                expressions=[exp.to_identifier("x")],
3655            )
3656
3657        result = exp.func(
3658            "MAP_FROM_ENTRIES",
3659            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3660        )
3661        return self.sql(result)
def mapsize_sql(self, expression: sqlglot.expressions.array.MapSize) -> str:
3663    def mapsize_sql(self, expression: exp.MapSize) -> str:
3664        return self.func("CARDINALITY", expression.this)
@unsupported_args('update_flag')
def mapinsert_sql(self, expression: sqlglot.expressions.array.MapInsert) -> str:
3666    @unsupported_args("update_flag")
3667    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3668        map_arg = expression.this
3669        key = expression.args.get("key")
3670        value = expression.args.get("value")
3671
3672        map_type = map_arg.type
3673
3674        if value is not None:
3675            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3676                # Extract the value type from MAP(key_type, value_type)
3677                value_type = map_type.expressions[1]
3678                # Cast value to match the map's value type to avoid type conflicts
3679                value = exp.cast(value, value_type)
3680            # else: polymorphic MAP case - no type parameters available, use value as-is
3681
3682        # Create a single-entry map for the new key-value pair
3683        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3684        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3685
3686        # Use MAP_CONCAT to merge the original map with the new entry
3687        # This automatically handles both insert and update cases
3688        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3689
3690        return self.sql(result)
def startswith_sql(self, expression: sqlglot.expressions.string.StartsWith) -> str:
3692    def startswith_sql(self, expression: exp.StartsWith) -> str:
3693        return self.func(
3694            "STARTS_WITH",
3695            _cast_to_varchar(expression.this),
3696            _cast_to_varchar(expression.expression),
3697        )
def space_sql(self, expression: sqlglot.expressions.string.Space) -> str:
3699    def space_sql(self, expression: exp.Space) -> str:
3700        # DuckDB's REPEAT requires BIGINT for the count parameter
3701        return self.sql(
3702            exp.Repeat(
3703                this=exp.Literal.string(" "),
3704                times=exp.cast(expression.this, exp.DType.BIGINT),
3705            )
3706        )
def tablefromrows_sql(self, expression: sqlglot.expressions.query.TableFromRows) -> str:
3708    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3709        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3710        if isinstance(expression.this, exp.Generator):
3711            # Preserve alias, joins, and other table-level args
3712            table = exp.Table(
3713                this=expression.this,
3714                alias=expression.args.get("alias"),
3715                joins=expression.args.get("joins"),
3716            )
3717            return self.sql(table)
3718
3719        return super().tablefromrows_sql(expression)
def unnest_sql(self, expression: sqlglot.expressions.array.Unnest) -> str:
3721    def unnest_sql(self, expression: exp.Unnest) -> str:
3722        explode_array = expression.args.get("explode_array")
3723        if explode_array:
3724            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3725            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3726            expression.expressions.append(
3727                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3728            )
3729
3730            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3731            alias = expression.args.get("alias")
3732            if isinstance(alias, exp.TableAlias):
3733                expression.set("alias", None)
3734                if alias.columns:
3735                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3736
3737            unnest_sql = super().unnest_sql(expression)
3738            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3739            return self.sql(select)
3740
3741        return super().unnest_sql(expression)
def ignorenulls_sql(self, expression: sqlglot.expressions.core.IgnoreNulls) -> str:
3743    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3744        this = expression.this
3745
3746        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3747            # DuckDB should render IGNORE NULLS only for the general-purpose
3748            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3749            return super().ignorenulls_sql(expression)
3750
3751        if isinstance(this, exp.First):
3752            this = exp.AnyValue(this=this.this)
3753
3754        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3755            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3756
3757        return self.sql(this)
def split_sql(self, expression: sqlglot.expressions.string.Split) -> str:
3759    def split_sql(self, expression: exp.Split) -> str:
3760        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3761
3762        case_expr = exp.case().else_(base_func)
3763        needs_case = False
3764
3765        if expression.args.get("null_returns_null"):
3766            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3767            needs_case = True
3768
3769        if expression.args.get("empty_delimiter_returns_whole"):
3770            # When delimiter is empty string, return input string as single array element
3771            array_with_input = exp.array(expression.this)
3772            case_expr = case_expr.when(
3773                expression.expression.eq(exp.Literal.string("")), array_with_input
3774            )
3775            needs_case = True
3776
3777        return self.sql(case_expr if needs_case else base_func)
def splitpart_sql(self, expression: sqlglot.expressions.string.SplitPart) -> str:
3779    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3780        string_arg = expression.this
3781        delimiter_arg = expression.args.get("delimiter")
3782        part_index_arg = expression.args.get("part_index")
3783
3784        if delimiter_arg and part_index_arg:
3785            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3786            if expression.args.get("part_index_zero_as_one"):
3787                # Convert 0 to 1 for compatibility
3788
3789                part_index_arg = exp.Paren(
3790                    this=exp.case()
3791                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3792                    .else_(part_index_arg)
3793                )
3794
3795            # Use Anonymous to avoid recursion
3796            base_func_expr: exp.Expr = exp.Anonymous(
3797                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3798            )
3799            needs_case_transform = False
3800            case_expr = exp.case().else_(base_func_expr)
3801
3802            if expression.args.get("empty_delimiter_returns_whole"):
3803                # When delimiter is empty string:
3804                # - Return whole string if part_index is 1 or -1
3805                # - Return empty string otherwise
3806                empty_case = exp.Paren(
3807                    this=exp.case()
3808                    .when(
3809                        exp.or_(
3810                            part_index_arg.eq(exp.Literal.number("1")),
3811                            part_index_arg.eq(exp.Literal.number("-1")),
3812                        ),
3813                        string_arg,
3814                    )
3815                    .else_(exp.Literal.string(""))
3816                )
3817
3818                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3819                needs_case_transform = True
3820
3821            """
3822            Output looks something like this:
3823
3824            CASE
3825            WHEN delimiter is '' THEN
3826                (
3827                    CASE
3828                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3829                    ELSE '' END
3830                )
3831            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3832            END
3833
3834            """
3835            return self.sql(case_expr if needs_case_transform else base_func_expr)
3836
3837        return self.function_fallback_sql(expression)
def respectnulls_sql(self, expression: sqlglot.expressions.core.RespectNulls) -> str:
3839    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3840        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3841            # DuckDB should render RESPECT NULLS only for the general-purpose
3842            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3843            return super().respectnulls_sql(expression)
3844
3845        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3846        return self.sql(expression, "this")
def arraytostring_sql(self, expression: sqlglot.expressions.array.ArrayToString) -> str:
3848    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3849        null = expression.args.get("null")
3850
3851        if expression.args.get("null_is_empty"):
3852            x = exp.to_identifier("x")
3853            list_transform = exp.Transform(
3854                this=expression.this.copy(),
3855                expression=exp.Lambda(
3856                    this=exp.Coalesce(
3857                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3858                    ),
3859                    expressions=[x],
3860                ),
3861            )
3862            array_to_string = exp.ArrayToString(
3863                this=list_transform, expression=expression.expression
3864            )
3865            if expression.args.get("null_delim_is_null"):
3866                return self.sql(
3867                    exp.case()
3868                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3869                    .else_(array_to_string)
3870                )
3871            return self.sql(array_to_string)
3872
3873        if null:
3874            x = exp.to_identifier("x")
3875            return self.sql(
3876                exp.ArrayToString(
3877                    this=exp.Transform(
3878                        this=expression.this,
3879                        expression=exp.Lambda(
3880                            this=exp.Coalesce(this=x, expressions=[null]),
3881                            expressions=[x],
3882                        ),
3883                    ),
3884                    expression=expression.expression,
3885                )
3886            )
3887
3888        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
def concatws_sql(self, expression: sqlglot.expressions.string.ConcatWs) -> str:
3890    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3891        # DuckDB-specific: handle binary types using DPipe (||) operator
3892        separator = seq_get(expression.expressions, 0)
3893        args = expression.expressions[1:]
3894
3895        if any(_is_binary(arg) for arg in [separator, *args]):
3896            result = args[0]
3897            for arg in args[1:]:
3898                result = exp.DPipe(
3899                    this=exp.DPipe(this=result, expression=separator), expression=arg
3900                )
3901            return self.sql(result)
3902
3903        return super().concatws_sql(expression)
def regexpextract_sql(self, expression: sqlglot.expressions.string.RegexpExtract) -> str:
3958    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
3959        return self._regexp_extract_sql(expression)
def regexpextractall_sql(self, expression: sqlglot.expressions.string.RegexpExtractAll) -> str:
3961    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
3962        return self._regexp_extract_sql(expression)
def regexpinstr_sql(self, expression: sqlglot.expressions.string.RegexpInstr) -> str:
3964    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
3965        this = expression.this
3966        pattern = expression.expression
3967        position = expression.args.get("position")
3968        orig_occ = expression.args.get("occurrence")
3969        occurrence = orig_occ or exp.Literal.number(1)
3970        option = expression.args.get("option")
3971        parameters = expression.args.get("parameters")
3972
3973        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3974        if validated_flags:
3975            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3976
3977        # Handle starting position offset
3978        pos_offset: exp.Expr = exp.Literal.number(0)
3979        if position and (not position.is_int or position.to_py() > 1):
3980            this = exp.Substring(this=this, start=position)
3981            pos_offset = position - exp.Literal.number(1)
3982
3983        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
3984        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
3985            lst = exp.Bracket(
3986                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
3987                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
3988                offset=1,
3989            )
3990            transform = exp.Anonymous(
3991                this="LIST_TRANSFORM",
3992                expressions=[
3993                    lst,
3994                    exp.Lambda(
3995                        this=exp.Length(this=exp.to_identifier("x")),
3996                        expressions=[exp.to_identifier("x")],
3997                    ),
3998                ],
3999            )
4000            return exp.Coalesce(
4001                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4002                expressions=[exp.Literal.number(0)],
4003            )
4004
4005        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4006        base_pos: exp.Expr = (
4007            exp.Literal.number(1)
4008            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4009            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4010            + pos_offset
4011        )
4012
4013        # option=1: add match length for end position
4014        if option and option.is_int and option.to_py() == 1:
4015            match_at_occ = exp.Bracket(
4016                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4017                expressions=[occurrence],
4018                offset=1,
4019            )
4020            base_pos = base_pos + exp.Coalesce(
4021                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4022            )
4023
4024        # NULL checks for all provided arguments
4025        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4026        null_args = [
4027            expression.this,
4028            expression.expression,
4029            position,
4030            orig_occ,
4031            option,
4032            parameters,
4033        ]
4034        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4035
4036        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4037
4038        return self.sql(
4039            exp.case()
4040            .when(exp.or_(*null_checks), exp.Null())
4041            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4042            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4043            .else_(base_pos)
4044        )
@unsupported_args('culture')
def numbertostr_sql(self, expression: sqlglot.expressions.string.NumberToStr) -> str:
4046    @unsupported_args("culture")
4047    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4048        fmt = expression.args.get("format")
4049        if fmt and fmt.is_int:
4050            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4051
4052        self.unsupported("Only integer formats are supported by NumberToStr")
4053        return self.function_fallback_sql(expression)
def autoincrementcolumnconstraint_sql(self, _) -> str:
4055    def autoincrementcolumnconstraint_sql(self, _) -> str:
4056        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4057        return ""
def aliases_sql(self, expression: sqlglot.expressions.core.Aliases) -> str:
4059    def aliases_sql(self, expression: exp.Aliases) -> str:
4060        this = expression.this
4061        if isinstance(this, exp.Posexplode):
4062            return self.posexplode_sql(this)
4063
4064        return super().aliases_sql(expression)
def posexplode_sql(self, expression: sqlglot.expressions.array.Posexplode) -> str:
4066    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4067        this = expression.this
4068        parent = expression.parent
4069
4070        # The default Spark aliases are "pos" and "col", unless specified otherwise
4071        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4072
4073        if isinstance(parent, exp.Aliases):
4074            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4075            pos, col = parent.expressions
4076        elif isinstance(parent, exp.Table):
4077            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4078            alias = parent.args.get("alias")
4079            if alias:
4080                pos, col = alias.columns or [pos, col]
4081                alias.pop()
4082
4083        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4084        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4085        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4086        gen_subscripts = self.sql(
4087            exp.Alias(
4088                this=exp.Anonymous(
4089                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4090                )
4091                - exp.Literal.number(1),
4092                alias=pos,
4093            )
4094        )
4095
4096        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4097
4098        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4099            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4100            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4101
4102        return posexplode_sql
def addmonths_sql(self, expression: sqlglot.expressions.temporal.AddMonths) -> str:
4104    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4105        """
4106        Handles three key issues:
4107        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4108        2. End-of-month preservation: If input is last day of month, result is last day of result month
4109        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4110        """
4111        from sqlglot.optimizer.annotate_types import annotate_types
4112
4113        this = expression.this
4114        if not this.type:
4115            this = annotate_types(this, dialect=self.dialect)
4116
4117        if this.is_type(*exp.DataType.TEXT_TYPES):
4118            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4119
4120        # Detect float/decimal months to apply rounding (Snowflake behavior)
4121        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4122        months_expr = expression.expression
4123        if not months_expr.type:
4124            months_expr = annotate_types(months_expr, dialect=self.dialect)
4125
4126        # Build interval or to_months expression based on type
4127        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4128        interval_or_to_months = (
4129            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4130            if months_expr.is_type(
4131                exp.DType.FLOAT,
4132                exp.DType.DOUBLE,
4133                exp.DType.DECIMAL,
4134            )
4135            # Integer case: standard INTERVAL N MONTH syntax
4136            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4137        )
4138
4139        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4140
4141        # Apply end-of-month preservation if Snowflake flag is set
4142        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4143        preserve_eom = expression.args.get("preserve_end_of_month")
4144        result_expr = (
4145            exp.case()
4146            .when(
4147                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4148                exp.func("LAST_DAY", date_add_expr),
4149            )
4150            .else_(date_add_expr)
4151            if preserve_eom
4152            else date_add_expr
4153        )
4154
4155        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4156        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4157        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4158        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4159        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4160            return self.sql(exp.Cast(this=result_expr, to=this.type))
4161        return self.sql(result_expr)

Handles three key issues:

  1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
  2. End-of-month preservation: If input is last day of month, result is last day of result month
  3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
def format_sql(self, expression: sqlglot.expressions.string.Format) -> str:
4163    def format_sql(self, expression: exp.Format) -> str:
4164        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4165            return self.func("FORMAT", "'{}'", expression.expressions[0])
4166
4167        return self.function_fallback_sql(expression)
def hexstring_sql( self, expression: sqlglot.expressions.query.HexString, binary_function_repr: str | None = None) -> str:
4169    def hexstring_sql(
4170        self, expression: exp.HexString, binary_function_repr: str | None = None
4171    ) -> str:
4172        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4173        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
def datetrunc_sql(self, expression: sqlglot.expressions.temporal.DateTrunc) -> str:
4175    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4176        unit = expression.args.get("unit")
4177        date = expression.this
4178
4179        week_start = _week_unit_to_dow(unit)
4180        unit = unit_to_str(expression)
4181
4182        if week_start:
4183            result = self.sql(
4184                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4185            )
4186        else:
4187            result = self.func("DATE_TRUNC", unit, date)
4188
4189        if (
4190            expression.args.get("input_type_preserved")
4191            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4192            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4193        ):
4194            return self.sql(exp.Cast(this=result, to=date.type))
4195
4196        return result
def timestamptrunc_sql(self, expression: sqlglot.expressions.temporal.TimestampTrunc) -> str:
4198    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4199        unit = unit_to_str(expression)
4200        zone = expression.args.get("zone")
4201        timestamp = expression.this
4202        date_unit = is_date_unit(unit)
4203
4204        if date_unit and zone:
4205            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4206            # Double AT TIME ZONE needed for BigQuery compatibility:
4207            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4208            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4209            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4210            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4211            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4212
4213        result = self.func("DATE_TRUNC", unit, timestamp)
4214        if expression.args.get("input_type_preserved"):
4215            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4216                dummy_date = exp.Cast(
4217                    this=exp.Literal.string("1970-01-01"),
4218                    to=exp.DataType(this=exp.DType.DATE),
4219                )
4220                date_time = exp.Add(this=dummy_date, expression=timestamp)
4221                result = self.func("DATE_TRUNC", unit, date_time)
4222                return self.sql(exp.Cast(this=result, to=timestamp.type))
4223
4224            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4225                date_unit and timestamp.is_type(exp.DType.DATE)
4226            ):
4227                return self.sql(exp.Cast(this=result, to=timestamp.type))
4228
4229        return result
def trim_sql(self, expression: sqlglot.expressions.string.Trim) -> str:
4231    def trim_sql(self, expression: exp.Trim) -> str:
4232        expression.this.replace(_cast_to_varchar(expression.this))
4233        if expression.expression:
4234            expression.expression.replace(_cast_to_varchar(expression.expression))
4235
4236        result_sql = super().trim_sql(expression)
4237        return _gen_with_cast_to_blob(self, expression, result_sql)
def round_sql(self, expression: sqlglot.expressions.math.Round) -> str:
4239    def round_sql(self, expression: exp.Round) -> str:
4240        this = expression.this
4241        decimals = expression.args.get("decimals")
4242        truncate = expression.args.get("truncate")
4243
4244        # DuckDB requires the scale (decimals) argument to be an INT
4245        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4246        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4247            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4248                decimals = exp.cast(decimals, exp.DType.INT)
4249
4250        func = "ROUND"
4251        if truncate:
4252            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4253            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4254                func = "ROUND_EVEN"
4255                truncate = None
4256            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4257            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4258                truncate = None
4259
4260        return self.func(func, this, decimals, truncate)
def strtok_sql(self, expression: sqlglot.expressions.string.Strtok) -> str:
4262    def strtok_sql(self, expression: exp.Strtok) -> str:
4263        string_arg = expression.this
4264        delimiter_arg = expression.args.get("delimiter")
4265        part_index_arg = expression.args.get("part_index")
4266
4267        if delimiter_arg and part_index_arg:
4268            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4269            escaped_delimiter = exp.Anonymous(
4270                this="REGEXP_REPLACE",
4271                expressions=[
4272                    delimiter_arg,
4273                    exp.Literal.string(
4274                        r"([\[\]^.\-*+?(){}|$\\])"
4275                    ),  # Escape problematic regex chars
4276                    exp.Literal.string(
4277                        r"\\\1"
4278                    ),  # Replace with escaped version using $1 backreference
4279                    exp.Literal.string("g"),  # Global flag
4280                ],
4281            )
4282            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4283            regex_pattern = (
4284                exp.case()
4285                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4286                .else_(
4287                    exp.func(
4288                        "CONCAT",
4289                        exp.Literal.string("["),
4290                        escaped_delimiter,
4291                        exp.Literal.string("]"),
4292                    )
4293                )
4294            )
4295
4296            # STRTOK skips empty strings, so we need to filter them out
4297            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4298            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4299            x = exp.to_identifier("x")
4300            is_empty = x.eq(exp.Literal.string(""))
4301            filtered_array = exp.func(
4302                "LIST_FILTER",
4303                split_array,
4304                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4305            )
4306            base_func = exp.Bracket(
4307                this=filtered_array,
4308                expressions=[part_index_arg],
4309                offset=1,
4310            )
4311
4312            # Use template with the built regex pattern
4313            result = exp.replace_placeholders(
4314                self.STRTOK_TEMPLATE.copy(),
4315                string=string_arg,
4316                delimiter=delimiter_arg,
4317                part_index=part_index_arg,
4318                base_func=base_func,
4319            )
4320
4321            return self.sql(result)
4322
4323        return self.function_fallback_sql(expression)
def approxquantile_sql(self, expression: sqlglot.expressions.aggregate.ApproxQuantile) -> str:
4325    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4326        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4327
4328        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4329        if expression.is_type(*exp.DataType.REAL_TYPES):
4330            result = f"CAST({result} AS DOUBLE)"
4331
4332        return result
def approxquantiles_sql(self, expression: sqlglot.expressions.aggregate.ApproxQuantiles) -> str:
4334    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4335        """
4336        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4337        dividing the input distribution into n equal-sized buckets.
4338
4339        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4340        does not document the specific algorithm used so results may differ. DuckDB does not
4341        support RESPECT NULLS.
4342        """
4343        this = expression.this
4344        if isinstance(this, exp.Distinct):
4345            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4346            if len(this.expressions) < 2:
4347                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4348                return self.function_fallback_sql(expression)
4349            num_quantiles_expr = this.expressions[1].pop()
4350        else:
4351            num_quantiles_expr = expression.expression
4352
4353        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4354            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4355            return self.function_fallback_sql(expression)
4356
4357        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4358        if num_quantiles <= 0:
4359            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4360            return self.function_fallback_sql(expression)
4361
4362        quantiles = [
4363            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4364            for i in range(num_quantiles + 1)
4365        ]
4366
4367        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))

BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values dividing the input distribution into n equal-sized buckets.

Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery does not document the specific algorithm used so results may differ. DuckDB does not support RESPECT NULLS.

def jsonextractscalar_sql(self, expression: sqlglot.expressions.json.JSONExtractScalar) -> str:
4369    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4370        if expression.args.get("scalar_only"):
4371            expression = exp.JSONExtractScalar(
4372                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4373            )
4374        return _arrow_json_extract_sql(self, expression)
def bitwisenot_sql(self, expression: sqlglot.expressions.core.BitwiseNot) -> str:
4376    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4377        this = expression.this
4378
4379        if _is_binary(this):
4380            expression.type = exp.DType.BINARY.into_expr()
4381
4382        arg = _cast_to_bit(this)
4383
4384        if isinstance(this, exp.Neg):
4385            arg = exp.Paren(this=arg)
4386
4387        expression.set("this", arg)
4388
4389        result_sql = f"~{self.sql(expression, 'this')}"
4390
4391        return _gen_with_cast_to_blob(self, expression, result_sql)
def window_sql(self, expression: sqlglot.expressions.query.Window) -> str:
4393    def window_sql(self, expression: exp.Window) -> str:
4394        this = expression.this
4395        if isinstance(this, exp.Corr) or (
4396            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4397        ):
4398            return self._corr_sql(expression)
4399
4400        return super().window_sql(expression)
def filter_sql(self, expression: sqlglot.expressions.core.Filter) -> str:
4402    def filter_sql(self, expression: exp.Filter) -> str:
4403        if isinstance(expression.this, exp.Corr):
4404            return self._corr_sql(expression)
4405
4406        return super().filter_sql(expression)
Inherited Members
sqlglot.generator.Generator
Generator
NULL_ORDERING_SUPPORTED
WINDOW_FUNCS_WITH_NULL_ORDERING
LOCKING_READS_SUPPORTED
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
WRAP_DERIVED_VALUES
CREATE_FUNCTION_RETURN_AS
MATCHED_BY_SOURCE
SUPPORTS_MERGE_WHERE
SINGLE_STRING_INTERVAL
INTERVAL_ALLOWS_PLURAL_FORM
LIMIT_ONLY_LITERALS
GROUPINGS_SEP
INDEX_ON
INOUT_SEPARATOR
DIRECTED_JOINS
QUERY_HINT_SEP
IS_BOOL_ALLOWED
DUPLICATE_KEY_UPDATE_WITH_SET
LIMIT_IS_TOP
RETURNING_END
EXTRACT_ALLOWS_QUOTES
TZ_TO_WITH_TIME_ZONE
VALUES_AS_TABLE
ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
UNNEST_WITH_ORDINALITY
AGGREGATE_FILTER_SUPPORTED
COMPUTED_COLUMN_WITH_TYPE
SUPPORTS_TABLE_COPY
TABLESAMPLE_REQUIRES_PARENS
TABLESAMPLE_SIZE_IS_ROWS
TABLESAMPLE_WITH_METHOD
COLLATE_IS_FUNC
DATA_TYPE_SPECIFIERS_ALLOWED
ENSURE_BOOLS
CTE_RECURSIVE_KEYWORD_REQUIRED
SUPPORTS_SINGLE_ARG_CONCAT
SUPPORTS_TABLE_ALIAS_COLUMNS
UNPIVOT_ALIASES_ARE_IDENTIFIERS
INSERT_OVERWRITE
SUPPORTS_SELECT_INTO
SUPPORTS_UNLOGGED_TABLES
LIKE_PROPERTY_INSIDE_SCHEMA
JSON_TYPE_REQUIRED_FOR_EXTRACTION
JSON_PATH_SINGLE_QUOTE_ESCAPE
SET_OP_MODIFIERS
COPY_PARAMS_ARE_WRAPPED
COPY_PARAMS_EQ_REQUIRED
TRY_SUPPORTED
SUPPORTS_UESCAPE
UNICODE_SUBSTITUTE
HEX_FUNC
WITH_PROPERTIES_PREFIX
QUOTE_JSON_PATH
SUPPORTS_EXPLODING_PROJECTIONS
ARRAY_CONCAT_IS_VAR_LEN
SUPPORTS_CONVERT_TIMEZONE
SUPPORTS_MEDIAN
SUPPORTS_UNIX_SECONDS
ALTER_SET_WRAPPED
PARSE_JSON_NAME
ARRAY_SIZE_NAME
ALTER_SET_TYPE
SUPPORTS_BETWEEN_FLAGS
MATCH_AGAINST_TABLE_PREFIX
DECLARE_DEFAULT_ASSIGNMENT
UPDATE_STATEMENT_SUPPORTS_FROM
STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
UNSUPPORTED_TYPES
TIME_PART_SINGULARS
TOKEN_MAPPING
EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
WITH_SEPARATED_COMMENTS
EXCLUDE_COMMENTS
PARAMETERIZABLE_TEXT_TYPES
EXPRESSIONS_WITHOUT_NESTED_CTES
RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
SAFE_JSON_PATH_KEY_RE
SENTINEL_LINE_BREAK
pretty
identify
normalize
pad
unsupported_level
max_unsupported
leading_comma
max_text_width
comments
dialect
normalize_functions
unsupported_messages
generate
preprocess
unsupported
sep
seg
sanitize_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_parts
column_sql
pseudocolumn_sql
columnposition_sql
columndef_sql
columnconstraint_sql
computedcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
generatedasrowcolumnconstraint_sql
periodforsystemtimeconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
inoutcolumnconstraint_sql
createable_sql
create_sql
sequenceproperties_sql
triggerproperties_sql
triggerreferencing_sql
triggerevent_sql
clone_sql
describe_sql
heredoc_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
bytestring_sql
unicodestring_sql
rawstring_sql
datatypeparam_sql
datatype_sql
directory_sql
delete_sql
drop_sql
set_operation
set_operations
fetch_sql
limitoptions_sql
hint_sql
indexparameters_sql
index_sql
identifier_sql
hex_sql
lowerhex_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_name
property_sql
uuidproperty_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
moduleproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
partitionboundspec_sql
partitionedofproperty_sql
lockingproperty_sql
withdataproperty_sql
withsystemversioningproperty_sql
insert_sql
introducer_sql
kill_sql
pseudotype_sql
objectidentifier_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
withtablehint_sql
indextablehint_sql
historicaldata_sql
table_parts
table_sql
pivot_sql
version_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
groupingsets_sql
rollup_sql
rollupindex_sql
rollupproperty_sql
cube_sql
group_sql
having_sql
connect_sql
prior_sql
lateral_op
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
queryband_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
booland_sql
boolor_sql
order_sql
withfill_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognizemeasure_sql
matchrecognize_sql
query_modifiers
options_modifier
for_modifiers
queryoption_sql
offset_limit_modifiers
after_limit_modifiers
select_sql
schema_sql
schema_columns_sql
star_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
prewhere_sql
where_sql
partition_by_sql
windowspec_sql
between_sql
bracket_offset_expressions
all_sql
any_sql
exists_sql
case_sql
constraint_sql
nextvaluefor_sql
convert_concat_args
concat_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonpath_sql
json_path_part
formatjson_sql
formatphrase_sql
jsonarray_sql
jsonarrayagg_sql
jsoncolumndef_sql
jsonschema_sql
jsontable_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
pivotalias_sql
atindex_sql
attimezone_sql
fromtimezone_sql
add_sql
and_sql
or_sql
xor_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwiseor_sql
bitwiserightshift_sql
cast_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
alterindex_sql
alterdiststyle_sql
altersortkey_sql
alterrename_sql
renamecolumn_sql
alterset_sql
alter_sql
altersession_sql
add_column_sql
droppartition_sql
addconstraint_sql
addpartition_sql
distinct_sql
havingmax_sql
intdiv_sql
dpipe_sql
div_sql
safedivide_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
propertyeq_sql
escape_sql
glob_sql
gt_sql
gte_sql
is_sql
like_sql
ilike_sql
match_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
sub_sql
trycast_sql
jsoncast_sql
try_sql
log_sql
use_sql
binary
ceil_floor
function_fallback_sql
func
format_args
too_wide
format_time
expressions
op_expressions
naked_property
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
whens_sql
merge_sql
tochar_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
duplicatekeyproperty_sql
uniquekeyproperty_sql
distributedbyproperty_sql
oncluster_sql
clusteredbyproperty_sql
anyvalue_sql
querytransform_sql
indexconstraintoption_sql
checkcolumnconstraint_sql
indexcolumnconstraint_sql
nvl2_sql
comprehension_sql
columnprefix_sql
opclass_sql
predict_sql
generateembedding_sql
generatetext_sql
generatetable_sql
generatebool_sql
generateint_sql
generatedouble_sql
mltranslate_sql
mlforecast_sql
aiforecast_sql
featuresattime_sql
vectorsearch_sql
forin_sql
refresh_sql
toarray_sql
tsordstotimestamp_sql
tsordstodatetime_sql
tsordstodate_sql
unixdate_sql
lastday_sql
dateadd_sql
arrayany_sql
struct_sql
partitionrange_sql
truncatetable_sql
convert_sql
copyparameter_sql
credentials_sql
copy_sql
semicolon_sql
datadeletionproperty_sql
maskingpolicycolumnconstraint_sql
gapfill_sql
scope_resolution
scoperesolution_sql
changes_sql
summarize_sql
explodinggenerateseries_sql
converttimezone_sql
json_sql
jsonvalue_sql
skipjsoncolumn_sql
conditionalinsert_sql
multitableinserts_sql
oncondition_sql
jsonextractquote_sql
jsonexists_sql
arrayagg_sql
slice_sql
apply_sql
grant_sql
revoke_sql
grantprivilege_sql
grantprincipal_sql
columns_sql
overlay_sql
todouble_sql
string_sql
median_sql
overflowtruncatebehavior_sql
unixseconds_sql
arraysize_sql
attach_sql
detach_sql
attachoption_sql
watermarkcolumnconstraint_sql
encodeproperty_sql
includeproperty_sql
xmlelement_sql
xmlkeyvalueoption_sql
partitionbyrangeproperty_sql
partitionbyrangepropertydynamic_sql
unpivotcolumns_sql
analyzesample_sql
analyzestatistics_sql
analyzehistogram_sql
analyzedelete_sql
analyzelistchainedrows_sql
analyzevalidate_sql
analyze_sql
xmltable_sql
xmlnamespace_sql
export_sql
declare_sql
declareitem_sql
recursivewithsearch_sql
parameterizedagg_sql
anonymousaggfunc_sql
combinedaggfunc_sql
combinedparameterizedagg_sql
get_put_sql
translatecharacters_sql
decodecase_sql
semanticview_sql
getextract_sql
datefromunixdate_sql
buildproperty_sql
refreshtriggerproperty_sql
modelattribute_sql
directorystage_sql
uuid_sql
initcap_sql
localtime_sql
localtimestamp_sql
weekstart_sql
block_sql
storedprocedure_sql
ifblock_sql
whileblock_sql
execute_sql
executesql_sql
altermodifysqlsecurity_sql
usingproperty_sql
renameindex_sql