sqlglot.dialects.duckdb
1from __future__ import annotations 2 3from decimal import Decimal 4from itertools import groupby 5import re 6import typing as t 7 8from sqlglot import exp, generator, parser, tokens, transforms 9 10from sqlglot.dialects.dialect import ( 11 DATETIME_DELTA, 12 Dialect, 13 JSON_EXTRACT_TYPE, 14 NormalizationStrategy, 15 approx_count_distinct_sql, 16 arrow_json_extract_sql, 17 binary_from_function, 18 bool_xor_sql, 19 build_default_decimal_type, 20 build_formatted_time, 21 build_regexp_extract, 22 count_if_to_sum, 23 date_delta_to_binary_interval_op, 24 date_trunc_to_time, 25 datestrtodate_sql, 26 encode_decode_sql, 27 explode_to_unnest_sql, 28 getbit_sql, 29 groupconcat_sql, 30 inline_array_unless_query, 31 months_between_sql, 32 no_datetime_sql, 33 no_comment_column_constraint_sql, 34 no_make_interval_sql, 35 no_time_sql, 36 no_timestamp_sql, 37 pivot_column_names, 38 regexp_replace_global_modifier, 39 rename_func, 40 remove_from_array_using_filter, 41 sha2_digest_sql, 42 sha256_sql, 43 strposition_sql, 44 str_to_time_sql, 45 timestrtotime_sql, 46 unit_to_str, 47) 48from sqlglot.generator import unsupported_args 49from sqlglot.helper import is_date_unit, seq_get 50from sqlglot.tokens import TokenType 51from sqlglot.parser import binary_range_parser 52 53# Regex to detect time zones in timestamps of the form [+|-]TT[:tt] 54# The pattern matches timezone offsets that appear after the time portion 55TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?") 56 57# Characters that must be escaped when building regex expressions in INITCAP 58REGEX_ESCAPE_REPLACEMENTS = { 59 "\\": "\\\\", 60 "-": r"\-", 61 "^": r"\^", 62 "[": r"\[", 63 "]": r"\]", 64} 65 66# Used to in RANDSTR transpilation 67RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 68RANDSTR_SEED = 123456 69 70# Whitespace control characters that DuckDB must process with `CHR({val})` calls 71WS_CONTROL_CHARS_TO_DUCK = { 72 "\u000b": 11, 73 "\u001c": 28, 74 "\u001d": 29, 75 "\u001e": 30, 76 "\u001f": 31, 77} 78 79# Days of week to ISO 8601 day-of-week numbers 80# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7 81WEEK_START_DAY_TO_DOW = { 82 "MONDAY": 1, 83 "TUESDAY": 2, 84 "WEDNESDAY": 3, 85 "THURSDAY": 4, 86 "FRIDAY": 5, 87 "SATURDAY": 6, 88 "SUNDAY": 7, 89} 90 91MAX_BIT_POSITION = exp.Literal.number(32768) 92 93 94def _last_day_sql(self: DuckDB.Generator, expression: exp.LastDay) -> str: 95 """ 96 DuckDB's LAST_DAY only supports finding the last day of a month. 97 For other date parts (year, quarter, week), we need to implement equivalent logic. 98 """ 99 date_expr = expression.this 100 unit = expression.text("unit") 101 102 if not unit or unit.upper() == "MONTH": 103 # Default behavior - use DuckDB's native LAST_DAY 104 return self.func("LAST_DAY", date_expr) 105 106 if unit.upper() == "YEAR": 107 # Last day of year: December 31st of the same year 108 year_expr = exp.func("EXTRACT", "YEAR", date_expr) 109 make_date_expr = exp.func( 110 "MAKE_DATE", year_expr, exp.Literal.number(12), exp.Literal.number(31) 111 ) 112 return self.sql(make_date_expr) 113 114 if unit.upper() == "QUARTER": 115 # Last day of quarter 116 year_expr = exp.func("EXTRACT", "YEAR", date_expr) 117 quarter_expr = exp.func("EXTRACT", "QUARTER", date_expr) 118 119 # Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4 120 last_month_expr = exp.Mul(this=quarter_expr, expression=exp.Literal.number(3)) 121 first_day_last_month_expr = exp.func( 122 "MAKE_DATE", year_expr, last_month_expr, exp.Literal.number(1) 123 ) 124 125 # Last day of the last month of the quarter 126 last_day_expr = exp.func("LAST_DAY", first_day_last_month_expr) 127 return self.sql(last_day_expr) 128 129 if unit.upper() == "WEEK": 130 # DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6 131 dow = exp.func("EXTRACT", "DAYOFWEEK", date_expr) 132 # Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake) 133 # Wrap in parentheses to ensure correct precedence 134 days_to_sunday_expr = exp.Mod( 135 this=exp.Paren(this=exp.Sub(this=exp.Literal.number(7), expression=dow)), 136 expression=exp.Literal.number(7), 137 ) 138 interval_expr = exp.Interval(this=days_to_sunday_expr, unit=exp.var("DAY")) 139 add_expr = exp.Add(this=date_expr, expression=interval_expr) 140 cast_expr = exp.cast(add_expr, exp.DataType.Type.DATE) 141 return self.sql(cast_expr) 142 143 self.unsupported(f"Unsupported date part '{unit}' in LAST_DAY function") 144 return self.function_fallback_sql(expression) 145 146 147def _is_nanosecond_unit(unit: t.Optional[exp.Expression]) -> bool: 148 return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND" 149 150 151def _handle_nanosecond_diff( 152 self: DuckDB.Generator, 153 end_time: exp.Expression, 154 start_time: exp.Expression, 155) -> str: 156 """Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it.""" 157 end_ns = exp.cast(end_time, exp.DataType.Type.TIMESTAMP_NS) 158 start_ns = exp.cast(start_time, exp.DataType.Type.TIMESTAMP_NS) 159 160 # Build expression tree: EPOCH_NS(end) - EPOCH_NS(start) 161 return self.sql( 162 exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns)) 163 ) 164 165 166def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str: 167 """ 168 Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent. 169 170 DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'. 171 We need to handle the 'on'/'off' cases explicitly. 172 173 For TO_BOOLEAN (safe=False): NaN and INF values cause errors. We use DuckDB's native ERROR() 174 function to replicate this behavior with a clear error message. 175 176 For TRY_TO_BOOLEAN (safe=True): Use DuckDB's TRY_CAST for conversion, which returns NULL 177 for invalid inputs instead of throwing errors. 178 """ 179 arg = expression.this 180 is_safe = expression.args.get("safe", False) 181 182 base_case_expr = ( 183 exp.case() 184 .when( 185 # Handle 'on' -> TRUE (case insensitive) 186 exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("ON")), 187 exp.true(), 188 ) 189 .when( 190 # Handle 'off' -> FALSE (case insensitive) 191 exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("OFF")), 192 exp.false(), 193 ) 194 ) 195 196 if is_safe: 197 # TRY_TO_BOOLEAN: handle 'on'/'off' and use TRY_CAST for everything else 198 case_expr = base_case_expr.else_(exp.func("TRY_CAST", arg, exp.DataType.build("BOOLEAN"))) 199 else: 200 # TO_BOOLEAN: handle NaN/INF errors, 'on'/'off', and use regular CAST 201 cast_to_real = exp.func("TRY_CAST", arg, exp.DataType.build("REAL")) 202 203 # Check for NaN and INF values 204 nan_inf_check = exp.Or( 205 this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real) 206 ) 207 208 case_expr = base_case_expr.when( 209 nan_inf_check, 210 exp.func( 211 "ERROR", 212 exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"), 213 ), 214 ).else_(exp.cast(arg, exp.DataType.Type.BOOLEAN)) 215 216 return self.sql(case_expr) 217 218 219# BigQuery -> DuckDB conversion for the DATE function 220def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 221 this = expression.this 222 zone = self.sql(expression, "zone") 223 224 if zone: 225 # BigQuery considers "this" at UTC, converts it to the specified 226 # time zone and then keeps only the DATE part 227 # To micmic that, we: 228 # (1) Cast to TIMESTAMP to remove DuckDB's local tz 229 # (2) Apply consecutive AtTimeZone calls for UTC -> zone conversion 230 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 231 at_utc = exp.AtTimeZone(this=this, zone=exp.Literal.string("UTC")) 232 this = exp.AtTimeZone(this=at_utc, zone=zone) 233 234 return self.sql(exp.cast(expression=this, to=exp.DataType.Type.DATE)) 235 236 237# BigQuery -> DuckDB conversion for the TIME_DIFF function 238def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 239 unit = expression.unit 240 241 if _is_nanosecond_unit(unit): 242 return _handle_nanosecond_diff(self, expression.expression, expression.this) 243 244 this = exp.cast(expression.this, exp.DataType.Type.TIME) 245 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 246 247 # Although the 2 dialects share similar signatures, BQ seems to inverse 248 # the sign of the result so the start/end time operands are flipped 249 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 250 251 252def _date_delta_to_binary_interval_op( 253 cast: bool = True, 254) -> t.Callable[[DuckDB.Generator, DATETIME_DELTA], str]: 255 """ 256 DuckDB override to handle: 257 1. NANOSECOND operations (DuckDB doesn't support INTERVAL ... NANOSECOND) 258 2. Float/decimal interval values (DuckDB INTERVAL requires integers) 259 """ 260 base_impl = date_delta_to_binary_interval_op(cast=cast) 261 262 def _duckdb_date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 263 unit = expression.unit 264 interval_value = expression.expression 265 266 # Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND) 267 if _is_nanosecond_unit(unit): 268 if isinstance(interval_value, exp.Interval): 269 interval_value = interval_value.this 270 271 timestamp_ns = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP_NS) 272 273 return self.sql( 274 exp.func( 275 "MAKE_TIMESTAMP_NS", 276 exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=interval_value), 277 ) 278 ) 279 280 # Handle float/decimal interval values as duckDB INTERVAL requires integer expressions 281 if not interval_value or isinstance(interval_value, exp.Interval): 282 return base_impl(self, expression) 283 284 if interval_value.is_type(*exp.DataType.REAL_TYPES): 285 expression.set("expression", exp.cast(exp.func("ROUND", interval_value), "INT")) 286 287 return base_impl(self, expression) 288 289 return _duckdb_date_delta_sql 290 291 292@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 293def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 294 return self.func("ARRAY_SORT", expression.this) 295 296 297def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 298 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 299 return self.func(name, expression.this) 300 301 302def _build_sort_array_desc(args: t.List) -> exp.Expression: 303 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 304 305 306def _build_array_prepend(args: t.List) -> exp.Expression: 307 return exp.ArrayPrepend(this=seq_get(args, 1), expression=seq_get(args, 0)) 308 309 310def _build_date_diff(args: t.List) -> exp.Expression: 311 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 312 313 314def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 315 def _builder(args: t.List) -> exp.GenerateSeries: 316 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 317 if len(args) == 1: 318 # DuckDB uses 0 as a default for the series' start when it's omitted 319 args.insert(0, exp.Literal.number("0")) 320 321 gen_series = exp.GenerateSeries.from_arg_list(args) 322 gen_series.set("is_end_exclusive", end_exclusive) 323 324 return gen_series 325 326 return _builder 327 328 329def _build_make_timestamp(args: t.List) -> exp.Expression: 330 if len(args) == 1: 331 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 332 333 return exp.TimestampFromParts( 334 year=seq_get(args, 0), 335 month=seq_get(args, 1), 336 day=seq_get(args, 2), 337 hour=seq_get(args, 3), 338 min=seq_get(args, 4), 339 sec=seq_get(args, 5), 340 ) 341 342 343def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 344 def _parse(self: DuckDB.Parser) -> exp.Show: 345 return self._parse_show_duckdb(*args, **kwargs) 346 347 return _parse 348 349 350def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 351 args: t.List[str] = [] 352 353 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 354 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 355 # The transformation to ROW will take place if: 356 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 357 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 358 ancestor_cast = expression.find_ancestor(exp.Cast) 359 is_bq_inline_struct = ( 360 (expression.find(exp.PropertyEQ) is None) 361 and ancestor_cast 362 and any( 363 casted_type.is_type(exp.DataType.Type.STRUCT) 364 for casted_type in ancestor_cast.find_all(exp.DataType) 365 ) 366 ) 367 368 for i, expr in enumerate(expression.expressions): 369 is_property_eq = isinstance(expr, exp.PropertyEQ) 370 value = expr.expression if is_property_eq else expr 371 372 if is_bq_inline_struct: 373 args.append(self.sql(value)) 374 else: 375 if is_property_eq: 376 if isinstance(expr.this, exp.Identifier): 377 key = self.sql(exp.Literal.string(expr.name)) 378 else: 379 key = self.sql(expr.this) 380 else: 381 key = self.sql(exp.Literal.string(f"_{i}")) 382 383 args.append(f"{key}: {self.sql(value)}") 384 385 csv_args = ", ".join(args) 386 387 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 388 389 390def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 391 if expression.is_type("array"): 392 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 393 394 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 395 if expression.is_type( 396 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 397 ): 398 return expression.this.value 399 400 return self.datatype_sql(expression) 401 402 403def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 404 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 405 return f"CAST({sql} AS TEXT)" 406 407 408def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 409 scale = expression.args.get("scale") 410 timestamp = expression.this 411 target_type = expression.args.get("target_type") 412 413 # Check if we need NTZ (naive timestamp in UTC) 414 is_ntz = target_type and target_type.this in ( 415 exp.DataType.Type.TIMESTAMP, 416 exp.DataType.Type.TIMESTAMPNTZ, 417 ) 418 419 if scale == exp.UnixToTime.MILLIS: 420 # EPOCH_MS already returns TIMESTAMP (naive, UTC) 421 return self.func("EPOCH_MS", timestamp) 422 if scale == exp.UnixToTime.MICROS: 423 # MAKE_TIMESTAMP already returns TIMESTAMP (naive, UTC) 424 return self.func("MAKE_TIMESTAMP", timestamp) 425 426 # Other scales: divide and use TO_TIMESTAMP 427 if scale not in (None, exp.UnixToTime.SECONDS): 428 timestamp = exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)) 429 430 to_timestamp: exp.Expression = exp.Anonymous(this="TO_TIMESTAMP", expressions=[timestamp]) 431 432 if is_ntz: 433 to_timestamp = exp.AtTimeZone(this=to_timestamp, zone=exp.Literal.string("UTC")) 434 435 return self.sql(to_timestamp) 436 437 438WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In, exp.Not) 439 440 441def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 442 arrow_sql = arrow_json_extract_sql(self, expression) 443 if not expression.same_parent and isinstance( 444 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 445 ): 446 arrow_sql = self.wrap(arrow_sql) 447 return arrow_sql 448 449 450def _implicit_datetime_cast( 451 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 452) -> t.Optional[exp.Expression]: 453 if isinstance(arg, exp.Literal) and arg.is_string: 454 ts = arg.name 455 if type == exp.DataType.Type.DATE and ":" in ts: 456 type = ( 457 exp.DataType.Type.TIMESTAMPTZ 458 if TIMEZONE_PATTERN.search(ts) 459 else exp.DataType.Type.TIMESTAMP 460 ) 461 462 arg = exp.cast(arg, type) 463 464 return arg 465 466 467def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]: 468 """ 469 Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming 470 from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts. 471 472 Args: 473 unit: The unit expression (Var for ISOWEEK or WeekStart) 474 475 Returns: 476 The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant). 477 478 Examples: 479 "WEEK(SUNDAY)" -> 7 480 "WEEK(MONDAY)" -> 1 481 "ISOWEEK" -> 1 482 """ 483 # Handle plain Var expressions for ISOWEEK only 484 if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK": 485 return 1 486 487 # Handle WeekStart expressions with explicit day 488 if isinstance(unit, exp.WeekStart): 489 return WEEK_START_DAY_TO_DOW.get(unit.name.upper()) 490 491 return None 492 493 494def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression: 495 """ 496 Build DATE_TRUNC expression for week boundaries with custom start day. 497 498 Args: 499 date_expr: The date expression to truncate 500 shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6) 501 502 DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard). 503 To align to a different start day, we shift the date before truncating. 504 505 Shift formula: Sunday (7) gets +1, others get (1 - start_dow) 506 Examples: 507 Monday (1): shift = 0 (no shift needed) 508 Tuesday (2): shift = -1 (shift back 1 day) ... 509 Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week) 510 """ 511 shift_days = 1 if start_dow == 7 else 1 - start_dow 512 513 # Shift date to align week boundaries with the desired start day 514 # No shift needed for Monday-based weeks (shift_days == 0) 515 shifted_date = ( 516 exp.DateAdd( 517 this=date_expr, 518 expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")), 519 ) 520 if shift_days != 0 521 else date_expr 522 ) 523 524 return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date) 525 526 527def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 528 unit = expression.unit 529 530 if _is_nanosecond_unit(unit): 531 return _handle_nanosecond_diff(self, expression.this, expression.expression) 532 533 this = _implicit_datetime_cast(expression.this) 534 expr = _implicit_datetime_cast(expression.expression) 535 536 # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7: 537 # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed) 538 # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference) 539 # Whereas for other units such as MONTH it does respect month boundaries: 540 # SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed) 541 date_part_boundary = expression.args.get("date_part_boundary") 542 543 # Extract week start day; returns None if day is dynamic (column/placeholder) 544 week_start = _week_unit_to_dow(unit) 545 if date_part_boundary and week_start and this and expr: 546 expression.set("unit", exp.Literal.string("WEEK")) 547 548 # Truncate both dates to week boundaries to respect input dialect semantics 549 this = _build_week_trunc_expression(this, week_start) 550 expr = _build_week_trunc_expression(expr, week_start) 551 552 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 553 554 555def _generate_datetime_array_sql( 556 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 557) -> str: 558 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 559 560 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 561 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 562 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 563 564 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 565 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 566 start=start, end=end, step=expression.args.get("step") 567 ) 568 569 if is_generate_date_array: 570 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 571 # GENERATE_DATE_ARRAY we must cast it back to DATE array 572 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 573 574 return self.sql(gen_series) 575 576 577def _json_extract_value_array_sql( 578 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 579) -> str: 580 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 581 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 582 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 583 584 585def _cast_to_varchar(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 586 if arg and arg.type and not arg.is_type(exp.DataType.Type.VARCHAR, exp.DataType.Type.UNKNOWN): 587 return exp.cast(arg, exp.DataType.Type.VARCHAR) 588 return arg 589 590 591def _cast_to_boolean(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 592 if arg and not arg.is_type(exp.DataType.Type.BOOLEAN): 593 return exp.cast(arg, exp.DataType.Type.BOOLEAN) 594 return arg 595 596 597def _is_binary(arg: exp.Expression) -> bool: 598 return arg.is_type( 599 exp.DataType.Type.BINARY, 600 exp.DataType.Type.VARBINARY, 601 exp.DataType.Type.BLOB, 602 ) 603 604 605def _gen_with_cast_to_blob( 606 self: DuckDB.Generator, expression: exp.Expression, result_sql: str 607) -> str: 608 if _is_binary(expression): 609 blob = exp.DataType.build("BLOB", dialect="duckdb") 610 result_sql = self.sql(exp.Cast(this=result_sql, to=blob)) 611 return result_sql 612 613 614def _cast_to_bit(arg: exp.Expression) -> exp.Expression: 615 if not _is_binary(arg): 616 return arg 617 618 if isinstance(arg, exp.HexString): 619 arg = exp.Unhex(this=exp.Literal.string(arg.this)) 620 621 return exp.cast(arg, exp.DataType.Type.BIT) 622 623 624def _prepare_binary_bitwise_args(expression: exp.Binary) -> None: 625 if _is_binary(expression.this): 626 expression.set("this", _cast_to_bit(expression.this)) 627 if _is_binary(expression.expression): 628 expression.set("expression", _cast_to_bit(expression.expression)) 629 630 631def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str: 632 # Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL 633 having = expression.this 634 if isinstance(having, exp.HavingMax): 635 func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL" 636 return self.func(func_name, having.this, having.expression) 637 return self.function_fallback_sql(expression) 638 639 640def _bitwise_agg_sql( 641 self: DuckDB.Generator, 642 expression: t.Union[exp.BitwiseOrAgg, exp.BitwiseAndAgg, exp.BitwiseXorAgg], 643) -> str: 644 """ 645 DuckDB's bitwise aggregate functions only accept integer types. For other types: 646 - DECIMAL/STRING: Use CAST(arg AS INT) to convert directly, will round to nearest int 647 - FLOAT/DOUBLE: Use ROUND(arg)::INT to round to nearest integer, required due to float precision loss 648 """ 649 if isinstance(expression, exp.BitwiseOrAgg): 650 func_name = "BIT_OR" 651 elif isinstance(expression, exp.BitwiseAndAgg): 652 func_name = "BIT_AND" 653 else: # exp.BitwiseXorAgg 654 func_name = "BIT_XOR" 655 656 arg = expression.this 657 658 if not arg.type: 659 from sqlglot.optimizer.annotate_types import annotate_types 660 661 arg = annotate_types(arg, dialect=self.dialect) 662 663 if arg.is_type(*exp.DataType.REAL_TYPES, *exp.DataType.TEXT_TYPES): 664 if arg.is_type(*exp.DataType.FLOAT_TYPES): 665 # float types need to be rounded first due to precision loss 666 arg = exp.func("ROUND", arg) 667 668 arg = exp.cast(arg, exp.DataType.Type.INT) 669 670 return self.func(func_name, arg) 671 672 673def _literal_sql_with_ws_chr(self: DuckDB.Generator, literal: str) -> str: 674 # DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly 675 if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal): 676 return self.sql(exp.Literal.string(literal)) 677 678 sql_segments: t.List[str] = [] 679 for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK): 680 if is_ws_control: 681 for ch in group: 682 duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch] 683 sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code)))) 684 else: 685 sql_segments.append(self.sql(exp.Literal.string("".join(group)))) 686 687 sql = " || ".join(sql_segments) 688 return sql if len(sql_segments) == 1 else f"({sql})" 689 690 691def _escape_regex_metachars( 692 self: DuckDB.Generator, delimiters: t.Optional[exp.Expression], delimiters_sql: str 693) -> str: 694 r""" 695 Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions. 696 697 Literal strings are escaped at transpile time, expressions handled with REPLACE() calls. 698 """ 699 if not delimiters: 700 return delimiters_sql 701 702 if delimiters.is_string: 703 literal_value = delimiters.this 704 escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value) 705 return _literal_sql_with_ws_chr(self, escaped_literal) 706 707 escaped_sql = delimiters_sql 708 for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items(): 709 escaped_sql = self.func( 710 "REPLACE", 711 escaped_sql, 712 self.sql(exp.Literal.string(raw)), 713 self.sql(exp.Literal.string(escaped)), 714 ) 715 716 return escaped_sql 717 718 719def _build_capitalization_sql( 720 self: DuckDB.Generator, 721 value_to_split: str, 722 delimiters_sql: str, 723) -> str: 724 # empty string delimiter --> treat value as one word, no need to split 725 if delimiters_sql == "''": 726 return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))" 727 728 delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')" 729 split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')" 730 731 # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments. 732 # We do not know whether the first segment is a delimiter or not, so we check the first character of the string 733 # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd. 734 return self.func( 735 "ARRAY_TO_STRING", 736 exp.case() 737 .when( 738 f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})", 739 self.func( 740 "LIST_TRANSFORM", 741 self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql), 742 "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END", 743 ), 744 ) 745 .else_( 746 self.func( 747 "LIST_TRANSFORM", 748 self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql), 749 "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END", 750 ), 751 ), 752 "''", 753 ) 754 755 756def _initcap_sql(self: DuckDB.Generator, expression: exp.Initcap) -> str: 757 this_sql = self.sql(expression, "this") 758 delimiters = expression.args.get("expression") 759 if delimiters is None: 760 # fallback for manually created exp.Initcap w/o delimiters arg 761 delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS) 762 delimiters_sql = self.sql(delimiters) 763 764 escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql) 765 766 return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql) 767 768 769def _boolxor_agg_sql(self: DuckDB.Generator, expression: exp.BoolxorAgg) -> str: 770 """ 771 Snowflake's `BOOLXOR_AGG(col)` returns TRUE if exactly one input in `col` is TRUE, FALSE otherwise; 772 Since DuckDB does not have a mapping function, we mimic the behavior by generating `COUNT_IF(col) = 1`. 773 774 DuckDB's COUNT_IF strictly requires boolean inputs, so cast if not already boolean. 775 """ 776 return self.sql( 777 exp.EQ( 778 this=exp.CountIf(this=_cast_to_boolean(expression.this)), 779 expression=exp.Literal.number(1), 780 ) 781 ) 782 783 784def _bitshift_sql( 785 self: DuckDB.Generator, expression: exp.BitwiseLeftShift | exp.BitwiseRightShift 786) -> str: 787 """ 788 Transform bitshift expressions for DuckDB by injecting BIT/INT128 casts. 789 790 DuckDB's bitwise shift operators don't work with BLOB/BINARY types, so we cast 791 them to BIT for the operation, then cast the result back to the original type. 792 793 Note: Assumes type annotation has been applied with the source dialect. 794 """ 795 operator = "<<" if isinstance(expression, exp.BitwiseLeftShift) else ">>" 796 result_is_blob = False 797 this = expression.this 798 799 if _is_binary(this): 800 result_is_blob = True 801 expression.set("this", exp.cast(this, exp.DataType.Type.BIT)) 802 elif expression.args.get("requires_int128"): 803 this.replace(exp.cast(this, exp.DataType.Type.INT128)) 804 805 result_sql = self.binary(expression, operator) 806 807 # Wrap in parentheses if parent is a bitwise operator to "fix" DuckDB precedence issue 808 # DuckDB parses: a << b | c << d as (a << b | c) << d 809 if isinstance(expression.parent, exp.Binary): 810 result_sql = self.sql(exp.Paren(this=result_sql)) 811 812 if result_is_blob: 813 result_sql = self.sql( 814 exp.Cast(this=result_sql, to=exp.DataType.build("BLOB", dialect="duckdb")) 815 ) 816 817 return result_sql 818 819 820def _scale_rounding_sql( 821 self: DuckDB.Generator, 822 expression: exp.Expression, 823 rounding_func: type[exp.Expression], 824) -> str | None: 825 """ 826 Handle scale parameter transformation for rounding functions. 827 828 DuckDB doesn't support the scale parameter for certain functions (e.g., FLOOR, CEIL), 829 so we transform: FUNC(x, n) to ROUND(FUNC(x * 10^n) / 10^n, n) 830 831 Args: 832 self: The DuckDB generator instance 833 expression: The expression to transform (must have 'this', 'decimals', and 'to' args) 834 rounding_func: The rounding function class to use in the transformation 835 836 Returns: 837 The transformed SQL string if decimals parameter exists, None otherwise 838 """ 839 decimals = expression.args.get("decimals") 840 841 if decimals is None or expression.args.get("to") is not None: 842 return None 843 844 this = expression.this 845 if isinstance(this, exp.Binary): 846 this = exp.Paren(this=this) 847 848 n_int = decimals 849 if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)): 850 n_int = exp.cast(decimals, exp.DataType.Type.INT) 851 852 pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int) 853 rounded = rounding_func(this=exp.Mul(this=this, expression=pow_)) 854 result = exp.Div(this=rounded, expression=pow_.copy()) 855 856 return self.round_sql( 857 exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True) 858 ) 859 860 861def _ceil_floor(self: DuckDB.Generator, expression: exp.Floor | exp.Ceil) -> str: 862 scaled_sql = _scale_rounding_sql(self, expression, type(expression)) 863 if scaled_sql is not None: 864 return scaled_sql 865 return self.ceil_floor(expression) 866 867 868def _regr_val_sql( 869 self: DuckDB.Generator, 870 expression: exp.RegrValx | exp.RegrValy, 871) -> str: 872 """ 873 Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent. 874 875 REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x. 876 REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y. 877 """ 878 from sqlglot.optimizer.annotate_types import annotate_types 879 880 y = expression.this 881 x = expression.expression 882 883 # Determine which argument to check for NULL and which to return based on expression type 884 if isinstance(expression, exp.RegrValx): 885 # REGR_VALX: check y for NULL, return x 886 check_for_null = y 887 return_value = x 888 return_value_attr = "expression" 889 else: 890 # REGR_VALY: check x for NULL, return y 891 check_for_null = x 892 return_value = y 893 return_value_attr = "this" 894 895 # Get the type from the return argument 896 result_type = return_value.type 897 898 # If no type info, annotate the expression to infer types 899 if not result_type or result_type.this == exp.DataType.Type.UNKNOWN: 900 try: 901 annotated = annotate_types(expression.copy(), dialect=self.dialect) 902 result_type = getattr(annotated, return_value_attr).type 903 except Exception: 904 pass 905 906 # Default to DOUBLE for regression functions if type still unknown 907 if not result_type or result_type.this == exp.DataType.Type.UNKNOWN: 908 result_type = exp.DataType.build("DOUBLE") 909 910 # Cast NULL to the same type as return_value to avoid DuckDB type inference issues 911 typed_null = exp.Cast(this=exp.Null(), to=result_type) 912 913 return self.sql( 914 exp.If( 915 this=exp.Is(this=check_for_null.copy(), expression=exp.Null()), 916 true=typed_null, 917 false=return_value.copy(), 918 ) 919 ) 920 921 922def _maybe_corr_null_to_false( 923 expression: t.Union[exp.Filter, exp.Window, exp.Corr], 924) -> t.Optional[t.Union[exp.Filter, exp.Window, exp.Corr]]: 925 corr = expression 926 while isinstance(corr, (exp.Window, exp.Filter)): 927 corr = corr.this 928 929 if not isinstance(corr, exp.Corr) or not corr.args.get("null_on_zero_variance"): 930 return None 931 932 corr.set("null_on_zero_variance", False) 933 return expression 934 935 936def _date_from_parts_sql(self, expression: exp.DateFromParts) -> str: 937 """ 938 Snowflake's DATE_FROM_PARTS allows out-of-range values for the month and day input. 939 E.g., larger values (month=13, day=100), zero-values (month=0, day=0), negative values (month=-13, day=-100). 940 941 DuckDB's MAKE_DATE does not support out-of-range values, but DuckDB's INTERVAL type does. 942 943 We convert to date arithmetic: 944 DATE_FROM_PARTS(year, month, day) 945 - MAKE_DATE(year, 1, 1) + INTERVAL (month-1) MONTH + INTERVAL (day-1) DAY 946 """ 947 year_expr = expression.args.get("year") 948 month_expr = expression.args.get("month") 949 day_expr = expression.args.get("day") 950 951 if expression.args.get("allow_overflow"): 952 base_date: exp.Expression = exp.func( 953 "MAKE_DATE", year_expr, exp.Literal.number(1), exp.Literal.number(1) 954 ) 955 956 if month_expr: 957 base_date = base_date + exp.Interval(this=month_expr - 1, unit=exp.var("MONTH")) 958 959 if day_expr: 960 base_date = base_date + exp.Interval(this=day_expr - 1, unit=exp.var("DAY")) 961 962 return self.sql(exp.cast(expression=base_date, to=exp.DataType.Type.DATE)) 963 964 return self.func("MAKE_DATE", year_expr, month_expr, day_expr) 965 966 967class DuckDB(Dialect): 968 NULL_ORDERING = "nulls_are_last" 969 SUPPORTS_USER_DEFINED_TYPES = True 970 SAFE_DIVISION = True 971 INDEX_OFFSET = 1 972 CONCAT_COALESCE = True 973 SUPPORTS_ORDER_BY_ALL = True 974 SUPPORTS_FIXED_SIZE_ARRAYS = True 975 STRICT_JSON_PATH_SYNTAX = False 976 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 977 978 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 979 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 980 981 DATE_PART_MAPPING = { 982 **Dialect.DATE_PART_MAPPING, 983 "DAYOFWEEKISO": "ISODOW", 984 } 985 986 DATE_PART_MAPPING.pop("WEEKDAY") 987 988 INVERSE_TIME_MAPPING = { 989 "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d) 990 } 991 992 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 993 if isinstance(path, exp.Literal): 994 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 995 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 996 # This check ensures we'll avoid trying to parse these as JSON paths, which can 997 # either result in a noisy warning or in an invalid representation of the path. 998 path_text = path.name 999 if path_text.startswith("/") or "[#" in path_text: 1000 return path 1001 1002 return super().to_json_path(path) 1003 1004 class Tokenizer(tokens.Tokenizer): 1005 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 1006 HEREDOC_STRINGS = ["$"] 1007 1008 HEREDOC_TAG_IS_IDENTIFIER = True 1009 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 1010 1011 KEYWORDS = { 1012 **tokens.Tokenizer.KEYWORDS, 1013 "//": TokenType.DIV, 1014 "**": TokenType.DSTAR, 1015 "^@": TokenType.CARET_AT, 1016 "@>": TokenType.AT_GT, 1017 "<@": TokenType.LT_AT, 1018 "ATTACH": TokenType.ATTACH, 1019 "BINARY": TokenType.VARBINARY, 1020 "BITSTRING": TokenType.BIT, 1021 "BPCHAR": TokenType.TEXT, 1022 "CHAR": TokenType.TEXT, 1023 "DATETIME": TokenType.TIMESTAMPNTZ, 1024 "DETACH": TokenType.DETACH, 1025 "FORCE": TokenType.FORCE, 1026 "INSTALL": TokenType.INSTALL, 1027 "INT8": TokenType.BIGINT, 1028 "LOGICAL": TokenType.BOOLEAN, 1029 "MACRO": TokenType.FUNCTION, 1030 "ONLY": TokenType.ONLY, 1031 "PIVOT_WIDER": TokenType.PIVOT, 1032 "POSITIONAL": TokenType.POSITIONAL, 1033 "RESET": TokenType.COMMAND, 1034 "ROW": TokenType.STRUCT, 1035 "SIGNED": TokenType.INT, 1036 "STRING": TokenType.TEXT, 1037 "SUMMARIZE": TokenType.SUMMARIZE, 1038 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 1039 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 1040 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 1041 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 1042 "TIMESTAMP_US": TokenType.TIMESTAMP, 1043 "UBIGINT": TokenType.UBIGINT, 1044 "UINTEGER": TokenType.UINT, 1045 "USMALLINT": TokenType.USMALLINT, 1046 "UTINYINT": TokenType.UTINYINT, 1047 "VARCHAR": TokenType.TEXT, 1048 } 1049 KEYWORDS.pop("/*+") 1050 1051 SINGLE_TOKENS = { 1052 **tokens.Tokenizer.SINGLE_TOKENS, 1053 "$": TokenType.PARAMETER, 1054 } 1055 1056 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1057 1058 class Parser(parser.Parser): 1059 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 1060 1061 BITWISE = parser.Parser.BITWISE.copy() 1062 BITWISE.pop(TokenType.CARET) 1063 1064 RANGE_PARSERS = { 1065 **parser.Parser.RANGE_PARSERS, 1066 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 1067 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 1068 TokenType.TILDA: binary_range_parser(exp.RegexpFullMatch), 1069 } 1070 1071 EXPONENT = { 1072 **parser.Parser.EXPONENT, 1073 TokenType.CARET: exp.Pow, 1074 TokenType.DSTAR: exp.Pow, 1075 } 1076 1077 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 1078 1079 SHOW_PARSERS = { 1080 "TABLES": _show_parser("TABLES"), 1081 "ALL TABLES": _show_parser("ALL TABLES"), 1082 } 1083 1084 FUNCTIONS = { 1085 **parser.Parser.FUNCTIONS, 1086 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 1087 "ARRAY_PREPEND": _build_array_prepend, 1088 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 1089 "ARRAY_SORT": exp.SortArray.from_arg_list, 1090 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 1091 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 1092 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 1093 "DATEDIFF": _build_date_diff, 1094 "DATE_DIFF": _build_date_diff, 1095 "DATE_TRUNC": date_trunc_to_time, 1096 "DATETRUNC": date_trunc_to_time, 1097 "DECODE": lambda args: exp.Decode( 1098 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 1099 ), 1100 "EDITDIST3": exp.Levenshtein.from_arg_list, 1101 "ENCODE": lambda args: exp.Encode( 1102 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 1103 ), 1104 "EPOCH": exp.TimeToUnix.from_arg_list, 1105 "EPOCH_MS": lambda args: exp.UnixToTime( 1106 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 1107 ), 1108 "GENERATE_SERIES": _build_generate_series(), 1109 "GET_BIT": lambda args: exp.Getbit( 1110 this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True 1111 ), 1112 "JSON": exp.ParseJSON.from_arg_list, 1113 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 1114 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 1115 "LIST_APPEND": exp.ArrayAppend.from_arg_list, 1116 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 1117 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 1118 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 1119 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 1120 "LIST_HAS": exp.ArrayContains.from_arg_list, 1121 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 1122 "LIST_PREPEND": _build_array_prepend, 1123 "LIST_REVERSE_SORT": _build_sort_array_desc, 1124 "LIST_SORT": exp.SortArray.from_arg_list, 1125 "LIST_TRANSFORM": exp.Transform.from_arg_list, 1126 "LIST_VALUE": lambda args: exp.Array(expressions=args), 1127 "MAKE_DATE": exp.DateFromParts.from_arg_list, 1128 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 1129 "MAKE_TIMESTAMP": _build_make_timestamp, 1130 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 1131 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 1132 "RANGE": _build_generate_series(end_exclusive=True), 1133 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 1134 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 1135 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 1136 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 1137 this=seq_get(args, 0), 1138 expression=seq_get(args, 1), 1139 replacement=seq_get(args, 2), 1140 modifiers=seq_get(args, 3), 1141 single_replace=True, 1142 ), 1143 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 1144 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 1145 "STRING_SPLIT": exp.Split.from_arg_list, 1146 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 1147 "STRING_TO_ARRAY": exp.Split.from_arg_list, 1148 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 1149 "STRUCT_PACK": exp.Struct.from_arg_list, 1150 "STR_SPLIT": exp.Split.from_arg_list, 1151 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 1152 "TIME_BUCKET": exp.DateBin.from_arg_list, 1153 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 1154 "UNNEST": exp.Explode.from_arg_list, 1155 "XOR": binary_from_function(exp.BitwiseXor), 1156 } 1157 1158 FUNCTIONS.pop("DATE_SUB") 1159 FUNCTIONS.pop("GLOB") 1160 1161 FUNCTION_PARSERS = { 1162 **parser.Parser.FUNCTION_PARSERS, 1163 **dict.fromkeys( 1164 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 1165 ), 1166 } 1167 FUNCTION_PARSERS.pop("DECODE") 1168 1169 NO_PAREN_FUNCTION_PARSERS = { 1170 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 1171 "MAP": lambda self: self._parse_map(), 1172 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 1173 } 1174 1175 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 1176 TokenType.SEMI, 1177 TokenType.ANTI, 1178 } 1179 1180 PLACEHOLDER_PARSERS = { 1181 **parser.Parser.PLACEHOLDER_PARSERS, 1182 TokenType.PARAMETER: lambda self: ( 1183 self.expression(exp.Placeholder, this=self._prev.text) 1184 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 1185 else None 1186 ), 1187 } 1188 1189 TYPE_CONVERTERS = { 1190 # https://duckdb.org/docs/sql/data_types/numeric 1191 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 1192 # https://duckdb.org/docs/sql/data_types/text 1193 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 1194 } 1195 1196 STATEMENT_PARSERS = { 1197 **parser.Parser.STATEMENT_PARSERS, 1198 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 1199 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 1200 TokenType.FORCE: lambda self: self._parse_force(), 1201 TokenType.INSTALL: lambda self: self._parse_install(), 1202 TokenType.SHOW: lambda self: self._parse_show(), 1203 } 1204 1205 SET_PARSERS = { 1206 **parser.Parser.SET_PARSERS, 1207 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 1208 } 1209 1210 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 1211 index = self._index 1212 if not self._match_text_seq("LAMBDA"): 1213 return super()._parse_lambda(alias=alias) 1214 1215 expressions = self._parse_csv(self._parse_lambda_arg) 1216 if not self._match(TokenType.COLON): 1217 self._retreat(index) 1218 return None 1219 1220 this = self._replace_lambda(self._parse_assignment(), expressions) 1221 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 1222 1223 def _parse_expression(self) -> t.Optional[exp.Expression]: 1224 # DuckDB supports prefix aliases, e.g. foo: 1 1225 if self._next and self._next.token_type == TokenType.COLON: 1226 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 1227 self._match(TokenType.COLON) 1228 comments = self._prev_comments or [] 1229 1230 this = self._parse_assignment() 1231 if isinstance(this, exp.Expression): 1232 # Moves the comment next to the alias in `alias: expr /* comment */` 1233 comments += this.pop_comments() or [] 1234 1235 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 1236 1237 return super()._parse_expression() 1238 1239 def _parse_table( 1240 self, 1241 schema: bool = False, 1242 joins: bool = False, 1243 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1244 parse_bracket: bool = False, 1245 is_db_reference: bool = False, 1246 parse_partition: bool = False, 1247 consume_pipe: bool = False, 1248 ) -> t.Optional[exp.Expression]: 1249 # DuckDB supports prefix aliases, e.g. FROM foo: bar 1250 if self._next and self._next.token_type == TokenType.COLON: 1251 alias = self._parse_table_alias( 1252 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1253 ) 1254 self._match(TokenType.COLON) 1255 comments = self._prev_comments or [] 1256 else: 1257 alias = None 1258 comments = [] 1259 1260 table = super()._parse_table( 1261 schema=schema, 1262 joins=joins, 1263 alias_tokens=alias_tokens, 1264 parse_bracket=parse_bracket, 1265 is_db_reference=is_db_reference, 1266 parse_partition=parse_partition, 1267 ) 1268 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 1269 # Moves the comment next to the alias in `alias: table /* comment */` 1270 comments += table.pop_comments() or [] 1271 alias.comments = alias.pop_comments() + comments 1272 table.set("alias", alias) 1273 1274 return table 1275 1276 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 1277 # https://duckdb.org/docs/sql/samples.html 1278 sample = super()._parse_table_sample(as_modifier=as_modifier) 1279 if sample and not sample.args.get("method"): 1280 if sample.args.get("size"): 1281 sample.set("method", exp.var("RESERVOIR")) 1282 else: 1283 sample.set("method", exp.var("SYSTEM")) 1284 1285 return sample 1286 1287 def _parse_bracket( 1288 self, this: t.Optional[exp.Expression] = None 1289 ) -> t.Optional[exp.Expression]: 1290 bracket = super()._parse_bracket(this) 1291 1292 if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket): 1293 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1294 bracket.set("returns_list_for_maps", True) 1295 1296 return bracket 1297 1298 def _parse_map(self) -> exp.ToMap | exp.Map: 1299 if self._match(TokenType.L_BRACE, advance=False): 1300 return self.expression(exp.ToMap, this=self._parse_bracket()) 1301 1302 args = self._parse_wrapped_csv(self._parse_assignment) 1303 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 1304 1305 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 1306 return self._parse_field_def() 1307 1308 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 1309 if len(aggregations) == 1: 1310 return super()._pivot_column_names(aggregations) 1311 return pivot_column_names(aggregations, dialect="duckdb") 1312 1313 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 1314 def _parse_attach_option() -> exp.AttachOption: 1315 return self.expression( 1316 exp.AttachOption, 1317 this=self._parse_var(any_token=True), 1318 expression=self._parse_field(any_token=True), 1319 ) 1320 1321 self._match(TokenType.DATABASE) 1322 exists = self._parse_exists(not_=is_attach) 1323 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 1324 1325 if self._match(TokenType.L_PAREN, advance=False): 1326 expressions = self._parse_wrapped_csv(_parse_attach_option) 1327 else: 1328 expressions = None 1329 1330 return ( 1331 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 1332 if is_attach 1333 else self.expression(exp.Detach, this=this, exists=exists) 1334 ) 1335 1336 def _parse_show_duckdb(self, this: str) -> exp.Show: 1337 return self.expression(exp.Show, this=this) 1338 1339 def _parse_force(self) -> exp.Install | exp.Command: 1340 # FORCE can only be followed by INSTALL or CHECKPOINT 1341 # In the case of CHECKPOINT, we fallback 1342 if not self._match(TokenType.INSTALL): 1343 return self._parse_as_command(self._prev) 1344 1345 return self._parse_install(force=True) 1346 1347 def _parse_install(self, force: bool = False) -> exp.Install: 1348 return self.expression( 1349 exp.Install, 1350 this=self._parse_id_var(), 1351 from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None, 1352 force=force, 1353 ) 1354 1355 def _parse_primary(self) -> t.Optional[exp.Expression]: 1356 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 1357 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 1358 1359 return super()._parse_primary() 1360 1361 class Generator(generator.Generator): 1362 PARAMETER_TOKEN = "$" 1363 NAMED_PLACEHOLDER_TOKEN = "$" 1364 JOIN_HINTS = False 1365 TABLE_HINTS = False 1366 QUERY_HINTS = False 1367 LIMIT_FETCH = "LIMIT" 1368 STRUCT_DELIMITER = ("(", ")") 1369 RENAME_TABLE_WITH_DB = False 1370 NVL2_SUPPORTED = False 1371 SEMI_ANTI_JOIN_WITH_SIDE = False 1372 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 1373 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 1374 LAST_DAY_SUPPORTS_DATE_PART = False 1375 JSON_KEY_VALUE_PAIR_SEP = "," 1376 IGNORE_NULLS_IN_FUNC = True 1377 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 1378 SUPPORTS_CREATE_TABLE_LIKE = False 1379 MULTI_ARG_DISTINCT = False 1380 CAN_IMPLEMENT_ARRAY_ANY = True 1381 SUPPORTS_TO_NUMBER = False 1382 SUPPORTS_WINDOW_EXCLUDE = True 1383 COPY_HAS_INTO_KEYWORD = False 1384 STAR_EXCEPT = "EXCLUDE" 1385 PAD_FILL_PATTERN_IS_REQUIRED = True 1386 ARRAY_CONCAT_IS_VAR_LEN = False 1387 ARRAY_SIZE_DIM_REQUIRED = False 1388 NORMALIZE_EXTRACT_DATE_PARTS = True 1389 SUPPORTS_LIKE_QUANTIFIERS = False 1390 SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True 1391 1392 TRANSFORMS = { 1393 **generator.Generator.TRANSFORMS, 1394 exp.AnyValue: _anyvalue_sql, 1395 exp.ApproxDistinct: approx_count_distinct_sql, 1396 exp.Boolnot: lambda self, e: f"NOT ({self.sql(e, 'this')})", 1397 exp.Array: transforms.preprocess( 1398 [transforms.inherit_struct_field_names], 1399 generator=inline_array_unless_query, 1400 ), 1401 exp.ArrayAppend: rename_func("LIST_APPEND"), 1402 exp.ArrayFilter: rename_func("LIST_FILTER"), 1403 exp.ArrayRemove: remove_from_array_using_filter, 1404 exp.ArraySort: _array_sort_sql, 1405 exp.ArrayPrepend: lambda self, e: self.func("LIST_PREPEND", e.expression, e.this), 1406 exp.ArraySum: rename_func("LIST_SUM"), 1407 exp.ArrayUniqueAgg: lambda self, e: self.func( 1408 "LIST", exp.Distinct(expressions=[e.this]) 1409 ), 1410 exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"), 1411 exp.BitwiseAndAgg: _bitwise_agg_sql, 1412 exp.BitwiseLeftShift: _bitshift_sql, 1413 exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"), 1414 exp.BitwiseOrAgg: _bitwise_agg_sql, 1415 exp.BitwiseRightShift: _bitshift_sql, 1416 exp.BitwiseXorAgg: _bitwise_agg_sql, 1417 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 1418 exp.Corr: lambda self, e: self._corr_sql(e), 1419 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 1420 exp.CurrentTime: lambda *_: "CURRENT_TIME", 1421 exp.CurrentTimestamp: lambda self, e: self.sql( 1422 exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC")) 1423 ) 1424 if e.args.get("sysdate") 1425 else "CURRENT_TIMESTAMP", 1426 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1427 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1428 exp.DayOfWeekIso: rename_func("ISODOW"), 1429 exp.DayOfYear: rename_func("DAYOFYEAR"), 1430 exp.Dayname: lambda self, e: ( 1431 self.func("STRFTIME", e.this, exp.Literal.string("%a")) 1432 if e.args.get("abbreviated") 1433 else self.func("DAYNAME", e.this) 1434 ), 1435 exp.Monthname: lambda self, e: ( 1436 self.func("STRFTIME", e.this, exp.Literal.string("%b")) 1437 if e.args.get("abbreviated") 1438 else self.func("MONTHNAME", e.this) 1439 ), 1440 exp.DataType: _datatype_sql, 1441 exp.Date: _date_sql, 1442 exp.DateAdd: _date_delta_to_binary_interval_op(), 1443 exp.DateFromParts: _date_from_parts_sql, 1444 exp.DateSub: _date_delta_to_binary_interval_op(), 1445 exp.DateDiff: _date_diff_sql, 1446 exp.DateStrToDate: datestrtodate_sql, 1447 exp.Datetime: no_datetime_sql, 1448 exp.DatetimeDiff: _date_diff_sql, 1449 exp.DatetimeSub: _date_delta_to_binary_interval_op(), 1450 exp.DatetimeAdd: _date_delta_to_binary_interval_op(), 1451 exp.DateToDi: lambda self, 1452 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 1453 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 1454 exp.DiToDate: lambda self, 1455 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 1456 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 1457 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 1458 exp.GenerateDateArray: _generate_datetime_array_sql, 1459 exp.GenerateTimestampArray: _generate_datetime_array_sql, 1460 exp.Getbit: getbit_sql, 1461 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 1462 exp.Explode: rename_func("UNNEST"), 1463 exp.IntDiv: lambda self, e: self.binary(e, "//"), 1464 exp.IsInf: rename_func("ISINF"), 1465 exp.IsNan: rename_func("ISNAN"), 1466 exp.Ceil: _ceil_floor, 1467 exp.Floor: _ceil_floor, 1468 exp.JSONBExists: rename_func("JSON_EXISTS"), 1469 exp.JSONExtract: _arrow_json_extract_sql, 1470 exp.JSONExtractArray: _json_extract_value_array_sql, 1471 exp.JSONFormat: _json_format_sql, 1472 exp.JSONValueArray: _json_extract_value_array_sql, 1473 exp.Lateral: explode_to_unnest_sql, 1474 exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)), 1475 exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)), 1476 exp.BoolxorAgg: _boolxor_agg_sql, 1477 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 1478 exp.Initcap: _initcap_sql, 1479 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 1480 exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)), 1481 exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)), 1482 exp.MonthsBetween: months_between_sql, 1483 exp.PercentileCont: rename_func("QUANTILE_CONT"), 1484 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 1485 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 1486 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 1487 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 1488 exp.RegexpReplace: lambda self, e: self.func( 1489 "REGEXP_REPLACE", 1490 e.this, 1491 e.expression, 1492 e.args.get("replacement"), 1493 regexp_replace_global_modifier(e), 1494 ), 1495 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 1496 exp.RegexpILike: lambda self, e: self.func( 1497 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 1498 ), 1499 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 1500 exp.RegrValx: _regr_val_sql, 1501 exp.RegrValy: _regr_val_sql, 1502 exp.Return: lambda self, e: self.sql(e, "this"), 1503 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 1504 exp.Rand: rename_func("RANDOM"), 1505 exp.SHA: rename_func("SHA1"), 1506 exp.SHA2: sha256_sql, 1507 exp.Split: rename_func("STR_SPLIT"), 1508 exp.SortArray: _sort_array_sql, 1509 exp.StrPosition: strposition_sql, 1510 exp.StrToUnix: lambda self, e: self.func( 1511 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 1512 ), 1513 exp.Struct: _struct_sql, 1514 exp.Transform: rename_func("LIST_TRANSFORM"), 1515 exp.TimeAdd: _date_delta_to_binary_interval_op(), 1516 exp.TimeSub: _date_delta_to_binary_interval_op(), 1517 exp.Time: no_time_sql, 1518 exp.TimeDiff: _timediff_sql, 1519 exp.Timestamp: no_timestamp_sql, 1520 exp.TimestampAdd: _date_delta_to_binary_interval_op(), 1521 exp.TimestampDiff: lambda self, e: self.func( 1522 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 1523 ), 1524 exp.TimestampSub: _date_delta_to_binary_interval_op(), 1525 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 1526 exp.TimeStrToTime: timestrtotime_sql, 1527 exp.TimeStrToUnix: lambda self, e: self.func( 1528 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 1529 ), 1530 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 1531 exp.ToBoolean: _to_boolean_sql, 1532 exp.TimeToUnix: rename_func("EPOCH"), 1533 exp.TsOrDiToDi: lambda self, 1534 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 1535 exp.TsOrDsAdd: _date_delta_to_binary_interval_op(), 1536 exp.TsOrDsDiff: lambda self, e: self.func( 1537 "DATE_DIFF", 1538 f"'{e.args.get('unit') or 'DAY'}'", 1539 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 1540 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 1541 ), 1542 exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)), 1543 exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)), 1544 exp.UnixSeconds: lambda self, e: self.sql( 1545 exp.cast( 1546 self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DataType.Type.BIGINT 1547 ) 1548 ), 1549 exp.UnixToStr: lambda self, e: self.func( 1550 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 1551 ), 1552 exp.DatetimeTrunc: lambda self, e: self.func( 1553 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 1554 ), 1555 exp.UnixToTime: _unix_to_time_sql, 1556 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 1557 exp.VariancePop: rename_func("VAR_POP"), 1558 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1559 exp.YearOfWeek: lambda self, e: self.sql( 1560 exp.Extract( 1561 this=exp.Var(this="ISOYEAR"), 1562 expression=e.this, 1563 ) 1564 ), 1565 exp.YearOfWeekIso: lambda self, e: self.sql( 1566 exp.Extract( 1567 this=exp.Var(this="ISOYEAR"), 1568 expression=e.this, 1569 ) 1570 ), 1571 exp.Xor: bool_xor_sql, 1572 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1573 rename_func("LEVENSHTEIN") 1574 ), 1575 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 1576 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 1577 exp.DateBin: rename_func("TIME_BUCKET"), 1578 exp.LastDay: _last_day_sql, 1579 } 1580 1581 SUPPORTED_JSON_PATH_PARTS = { 1582 exp.JSONPathKey, 1583 exp.JSONPathRoot, 1584 exp.JSONPathSubscript, 1585 exp.JSONPathWildcard, 1586 } 1587 1588 TYPE_MAPPING = { 1589 **generator.Generator.TYPE_MAPPING, 1590 exp.DataType.Type.BINARY: "BLOB", 1591 exp.DataType.Type.BPCHAR: "TEXT", 1592 exp.DataType.Type.CHAR: "TEXT", 1593 exp.DataType.Type.DATETIME: "TIMESTAMP", 1594 exp.DataType.Type.DECFLOAT: "DECIMAL(38, 5)", 1595 exp.DataType.Type.FLOAT: "REAL", 1596 exp.DataType.Type.JSONB: "JSON", 1597 exp.DataType.Type.NCHAR: "TEXT", 1598 exp.DataType.Type.NVARCHAR: "TEXT", 1599 exp.DataType.Type.UINT: "UINTEGER", 1600 exp.DataType.Type.VARBINARY: "BLOB", 1601 exp.DataType.Type.ROWVERSION: "BLOB", 1602 exp.DataType.Type.VARCHAR: "TEXT", 1603 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMPTZ", 1604 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 1605 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 1606 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 1607 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 1608 exp.DataType.Type.BIGDECIMAL: "DECIMAL(38, 5)", 1609 } 1610 1611 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 1612 RESERVED_KEYWORDS = { 1613 "array", 1614 "analyse", 1615 "union", 1616 "all", 1617 "when", 1618 "in_p", 1619 "default", 1620 "create_p", 1621 "window", 1622 "asymmetric", 1623 "to", 1624 "else", 1625 "localtime", 1626 "from", 1627 "end_p", 1628 "select", 1629 "current_date", 1630 "foreign", 1631 "with", 1632 "grant", 1633 "session_user", 1634 "or", 1635 "except", 1636 "references", 1637 "fetch", 1638 "limit", 1639 "group_p", 1640 "leading", 1641 "into", 1642 "collate", 1643 "offset", 1644 "do", 1645 "then", 1646 "localtimestamp", 1647 "check_p", 1648 "lateral_p", 1649 "current_role", 1650 "where", 1651 "asc_p", 1652 "placing", 1653 "desc_p", 1654 "user", 1655 "unique", 1656 "initially", 1657 "column", 1658 "both", 1659 "some", 1660 "as", 1661 "any", 1662 "only", 1663 "deferrable", 1664 "null_p", 1665 "current_time", 1666 "true_p", 1667 "table", 1668 "case", 1669 "trailing", 1670 "variadic", 1671 "for", 1672 "on", 1673 "distinct", 1674 "false_p", 1675 "not", 1676 "constraint", 1677 "current_timestamp", 1678 "returning", 1679 "primary", 1680 "intersect", 1681 "having", 1682 "analyze", 1683 "current_user", 1684 "and", 1685 "cast", 1686 "symmetric", 1687 "using", 1688 "order", 1689 "current_catalog", 1690 } 1691 1692 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 1693 1694 # DuckDB doesn't generally support CREATE TABLE .. properties 1695 # https://duckdb.org/docs/sql/statements/create_table.html 1696 PROPERTIES_LOCATION = { 1697 prop: exp.Properties.Location.UNSUPPORTED 1698 for prop in generator.Generator.PROPERTIES_LOCATION 1699 } 1700 1701 # There are a few exceptions (e.g. temporary tables) which are supported or 1702 # can be transpiled to DuckDB, so we explicitly override them accordingly 1703 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 1704 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 1705 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 1706 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 1707 1708 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 1709 exp.FirstValue, 1710 exp.Lag, 1711 exp.LastValue, 1712 exp.Lead, 1713 exp.NthValue, 1714 ) 1715 1716 # Template for ZIPF transpilation - placeholders get replaced with actual parameters 1717 ZIPF_TEMPLATE: exp.Expression = exp.maybe_parse( 1718 """ 1719 WITH rand AS (SELECT :random_expr AS r), 1720 weights AS ( 1721 SELECT i, 1.0 / POWER(i, :s) AS w 1722 FROM RANGE(1, :n + 1) AS t(i) 1723 ), 1724 cdf AS ( 1725 SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p 1726 FROM weights 1727 ) 1728 SELECT MIN(i) 1729 FROM cdf 1730 WHERE p >= (SELECT r FROM rand) 1731 """ 1732 ) 1733 1734 # Template for NORMAL transpilation using Box-Muller transform 1735 # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2)) 1736 NORMAL_TEMPLATE: exp.Expression = exp.maybe_parse( 1737 ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))" 1738 ) 1739 1740 # Template for generating a seeded pseudo-random value in [0, 1) from a hash 1741 SEEDED_RANDOM_TEMPLATE: exp.Expression = exp.maybe_parse( 1742 "(ABS(HASH(:seed)) % 1000000) / 1000000.0" 1743 ) 1744 1745 # Template for RANDSTR transpilation - placeholders get replaced with actual parameters 1746 RANDSTR_TEMPLATE: exp.Expression = exp.maybe_parse( 1747 f""" 1748 SELECT LISTAGG( 1749 SUBSTRING( 1750 '{RANDSTR_CHAR_POOL}', 1751 1 + CAST(FLOOR(random_value * 62) AS INT), 1752 1 1753 ), 1754 '' 1755 ) 1756 FROM ( 1757 SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value 1758 FROM RANGE(:length) AS t(i) 1759 ) 1760 """, 1761 ) 1762 1763 def bitmapbucketnumber_sql( 1764 self: DuckDB.Generator, expression: exp.BitmapBucketNumber 1765 ) -> str: 1766 """ 1767 Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent. 1768 1769 Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where: 1770 - Each bucket covers 32,768 values 1771 - Bucket numbering starts at 1 1772 - Formula: ((value - 1) // 32768) + 1 for positive values 1773 1774 For non-positive values (0 and negative), we use value // 32768 to avoid 1775 producing bucket 0 or positive bucket IDs for negative inputs. 1776 """ 1777 value = expression.this 1778 1779 positive_formula = ((value - 1) // 32768) + 1 1780 non_positive_formula = value // 32768 1781 1782 # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END 1783 case_expr = ( 1784 exp.case() 1785 .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula) 1786 .else_(non_positive_formula) 1787 ) 1788 return self.sql(case_expr) 1789 1790 def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str: 1791 """ 1792 Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression. 1793 1794 Snowflake's BITMAP_BIT_POSITION behavior: 1795 - For n <= 0: returns ABS(n) % 32768 1796 - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767) 1797 """ 1798 this = expression.this 1799 1800 return self.sql( 1801 exp.Mod( 1802 this=exp.Paren( 1803 this=exp.If( 1804 this=exp.GT(this=this, expression=exp.Literal.number(0)), 1805 true=this - exp.Literal.number(1), 1806 false=exp.Abs(this=this), 1807 ) 1808 ), 1809 expression=MAX_BIT_POSITION, 1810 ) 1811 ) 1812 1813 def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str: 1814 """ 1815 Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. 1816 Uses a pre-parsed template with placeholders replaced by expression nodes. 1817 1818 RANDSTR(length, generator) generates a random string of specified length. 1819 - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result) 1820 - With RANDOM(): Use RANDOM() in the hash for non-deterministic output 1821 - No generator: Use default seed value 1822 """ 1823 length = expression.this 1824 generator = expression.args.get("generator") 1825 1826 if generator: 1827 if isinstance(generator, exp.Rand): 1828 # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself 1829 seed_value = generator.this or generator 1830 else: 1831 # Const/int or other expression - use as seed directly 1832 seed_value = generator 1833 else: 1834 # No generator specified, use default seed (arbitrary but deterministic) 1835 seed_value = exp.Literal.number(RANDSTR_SEED) 1836 1837 replacements = {"seed": seed_value, "length": length} 1838 return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})" 1839 1840 def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str: 1841 """ 1842 Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. 1843 Uses a pre-parsed template with placeholders replaced by expression nodes. 1844 """ 1845 s = expression.this 1846 n = expression.args["elementcount"] 1847 gen = expression.args["gen"] 1848 1849 if not isinstance(gen, exp.Rand): 1850 # (ABS(HASH(seed)) % 1000000) / 1000000.0 1851 random_expr: exp.Expression = exp.Div( 1852 this=exp.Paren( 1853 this=exp.Mod( 1854 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])), 1855 expression=exp.Literal.number(1000000), 1856 ) 1857 ), 1858 expression=exp.Literal.number(1000000.0), 1859 ) 1860 else: 1861 # Use RANDOM() for non-deterministic output 1862 random_expr = exp.Rand() 1863 1864 replacements = {"s": s, "n": n, "random_expr": random_expr} 1865 return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})" 1866 1867 def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str: 1868 """ 1869 TO_BINARY and TRY_TO_BINARY transpilation: 1870 - 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50') 1871 - 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST') 1872 - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==') 1873 1874 For TRY_TO_BINARY (safe=True), wrap with TRY(): 1875 - 'HEX': TRY_TO_BINARY('invalid', 'HEX') → TRY(UNHEX('invalid')) 1876 """ 1877 value = expression.this 1878 format_arg = expression.args.get("format") 1879 is_safe = expression.args.get("safe") 1880 1881 fmt = "HEX" 1882 if format_arg: 1883 fmt = format_arg.name.upper() 1884 1885 if expression.is_type(exp.DataType.Type.BINARY): 1886 if fmt == "UTF-8": 1887 result = self.func("ENCODE", value) 1888 elif fmt == "BASE64": 1889 result = self.func("FROM_BASE64", value) 1890 elif fmt == "HEX": 1891 result = self.func("UNHEX", value) 1892 else: 1893 if is_safe: 1894 return self.sql(exp.null()) 1895 else: 1896 self.unsupported(f"format {fmt} is not supported") 1897 result = self.func("TO_BINARY", value) 1898 1899 # Wrap with TRY() for TRY_TO_BINARY 1900 if is_safe: 1901 result = self.func("TRY", result) 1902 1903 return result 1904 1905 # Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake 1906 return self.func("TO_BINARY", value) 1907 1908 def _greatest_least_sql( 1909 self: DuckDB.Generator, expression: exp.Greatest | exp.Least 1910 ) -> str: 1911 """ 1912 Handle GREATEST/LEAST functions with dialect-aware NULL behavior. 1913 1914 - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL 1915 - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value 1916 """ 1917 # Get all arguments 1918 all_args = [expression.this, *expression.expressions] 1919 fallback_sql = self.function_fallback_sql(expression) 1920 1921 if expression.args.get("ignore_nulls"): 1922 # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs) 1923 return self.sql(fallback_sql) 1924 1925 # return NULL if any argument is NULL 1926 case_expr = exp.case().when( 1927 exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False), 1928 exp.null(), 1929 copy=False, 1930 ) 1931 case_expr.set("default", fallback_sql) 1932 return self.sql(case_expr) 1933 1934 def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str: 1935 return self._greatest_least_sql(expression) 1936 1937 def least_sql(self: DuckDB.Generator, expression: exp.Least) -> str: 1938 return self._greatest_least_sql(expression) 1939 1940 def lambda_sql( 1941 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 1942 ) -> str: 1943 if expression.args.get("colon"): 1944 prefix = "LAMBDA " 1945 arrow_sep = ":" 1946 wrap = False 1947 else: 1948 prefix = "" 1949 1950 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 1951 return f"{prefix}{lambda_sql}" 1952 1953 def show_sql(self, expression: exp.Show) -> str: 1954 return f"SHOW {expression.name}" 1955 1956 def install_sql(self, expression: exp.Install) -> str: 1957 force = "FORCE " if expression.args.get("force") else "" 1958 this = self.sql(expression, "this") 1959 from_clause = expression.args.get("from_") 1960 from_clause = f" FROM {from_clause}" if from_clause else "" 1961 return f"{force}INSTALL {this}{from_clause}" 1962 1963 def approxtopk_sql(self, expression: exp.ApproxTopK) -> str: 1964 self.unsupported( 1965 "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. " 1966 ) 1967 return self.function_fallback_sql(expression) 1968 1969 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 1970 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 1971 1972 def strtotime_sql(self, expression: exp.StrToTime) -> str: 1973 # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants) 1974 target_type = expression.args.get("target_type") 1975 needs_tz = target_type and target_type.this in ( 1976 exp.DataType.Type.TIMESTAMPLTZ, 1977 exp.DataType.Type.TIMESTAMPTZ, 1978 ) 1979 1980 if expression.args.get("safe"): 1981 formatted_time = self.format_time(expression) 1982 cast_type = ( 1983 exp.DataType.Type.TIMESTAMPTZ if needs_tz else exp.DataType.Type.TIMESTAMP 1984 ) 1985 return self.sql( 1986 exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type) 1987 ) 1988 1989 base_sql = str_to_time_sql(self, expression) 1990 if needs_tz: 1991 return self.sql( 1992 exp.cast( 1993 base_sql, 1994 exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ), 1995 ) 1996 ) 1997 return base_sql 1998 1999 def strtodate_sql(self, expression: exp.StrToDate) -> str: 2000 formatted_time = self.format_time(expression) 2001 function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME" 2002 return self.sql( 2003 exp.cast( 2004 self.func(function_name, expression.this, formatted_time), 2005 exp.DataType(this=exp.DataType.Type.DATE), 2006 ) 2007 ) 2008 2009 def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str: 2010 this = expression.this 2011 time_format = self.format_time(expression) 2012 safe = expression.args.get("safe") 2013 time_type = exp.DataType.build("TIME", dialect="duckdb") 2014 cast_expr = exp.TryCast if safe else exp.Cast 2015 2016 if time_format: 2017 func_name = "TRY_STRPTIME" if safe else "STRPTIME" 2018 strptime = exp.Anonymous(this=func_name, expressions=[this, time_format]) 2019 return self.sql(cast_expr(this=strptime, to=time_type)) 2020 2021 if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DataType.Type.TIME): 2022 return self.sql(this) 2023 2024 return self.sql(cast_expr(this=this, to=time_type)) 2025 2026 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2027 if not expression.this: 2028 return "CURRENT_DATE" 2029 2030 expr = exp.Cast( 2031 this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this), 2032 to=exp.DataType(this=exp.DataType.Type.DATE), 2033 ) 2034 return self.sql(expr) 2035 2036 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 2037 arg = expression.this 2038 if expression.args.get("safe"): 2039 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 2040 return self.func("JSON", arg) 2041 2042 def normal_sql(self, expression: exp.Normal) -> str: 2043 """ 2044 Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB. 2045 2046 Uses the Box-Muller transform via NORMAL_TEMPLATE. 2047 """ 2048 mean = expression.this 2049 stddev = expression.args["stddev"] 2050 gen: exp.Expression = expression.args["gen"] 2051 2052 # Build two uniform random values [0, 1) for Box-Muller transform 2053 if isinstance(gen, exp.Rand) and gen.this is None: 2054 u1: exp.Expression = exp.Rand() 2055 u2: exp.Expression = exp.Rand() 2056 else: 2057 # Seeded: derive two values using HASH with different inputs 2058 seed = gen.this if isinstance(gen, exp.Rand) else gen 2059 u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed) 2060 u2 = exp.replace_placeholders( 2061 self.SEEDED_RANDOM_TEMPLATE, 2062 seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)), 2063 ) 2064 2065 replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2} 2066 return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements)) 2067 2068 def uniform_sql(self, expression: exp.Uniform) -> str: 2069 """ 2070 Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB. 2071 2072 UNIFORM returns a random value in [min, max]: 2073 - Integer result if both min and max are integers 2074 - Float result if either min or max is a float 2075 """ 2076 min_val = expression.this 2077 max_val = expression.expression 2078 gen = expression.args.get("gen") 2079 2080 # Determine if result should be integer (both bounds are integers). 2081 # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT 2082 is_int_result = min_val.is_int and max_val.is_int 2083 2084 # Build the random value expression [0, 1) 2085 if not isinstance(gen, exp.Rand): 2086 # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0 2087 random_expr: exp.Expression = exp.Div( 2088 this=exp.Paren( 2089 this=exp.Mod( 2090 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])), 2091 expression=exp.Literal.number(1000000), 2092 ) 2093 ), 2094 expression=exp.Literal.number(1000000.0), 2095 ) 2096 else: 2097 random_expr = exp.Rand() 2098 2099 # Build: min + random * (max - min [+ 1 for int]) 2100 range_expr: exp.Expression = exp.Sub(this=max_val, expression=min_val) 2101 if is_int_result: 2102 range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1)) 2103 2104 result: exp.Expression = exp.Add( 2105 this=min_val, 2106 expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)), 2107 ) 2108 2109 if is_int_result: 2110 result = exp.Cast( 2111 this=exp.Floor(this=result), 2112 to=exp.DataType.build("BIGINT"), 2113 ) 2114 2115 return self.sql(result) 2116 2117 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 2118 nano = expression.args.get("nano") 2119 if nano is not None: 2120 expression.set( 2121 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 2122 ) 2123 2124 return rename_func("MAKE_TIME")(self, expression) 2125 2126 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 2127 sec = expression.args["sec"] 2128 2129 milli = expression.args.get("milli") 2130 if milli is not None: 2131 sec += milli.pop() / exp.Literal.number(1000.0) 2132 2133 nano = expression.args.get("nano") 2134 if nano is not None: 2135 sec += nano.pop() / exp.Literal.number(1000000000.0) 2136 2137 if milli or nano: 2138 expression.set("sec", sec) 2139 2140 return rename_func("MAKE_TIMESTAMP")(self, expression) 2141 2142 def tablesample_sql( 2143 self, 2144 expression: exp.TableSample, 2145 tablesample_keyword: t.Optional[str] = None, 2146 ) -> str: 2147 if not isinstance(expression.parent, exp.Select): 2148 # This sample clause only applies to a single source, not the entire resulting relation 2149 tablesample_keyword = "TABLESAMPLE" 2150 2151 if expression.args.get("size"): 2152 method = expression.args.get("method") 2153 if method and method.name.upper() != "RESERVOIR": 2154 self.unsupported( 2155 f"Sampling method {method} is not supported with a discrete sample count, " 2156 "defaulting to reservoir sampling" 2157 ) 2158 expression.set("method", exp.var("RESERVOIR")) 2159 2160 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 2161 2162 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 2163 if isinstance(expression.parent, exp.UserDefinedFunction): 2164 return self.sql(expression, "this") 2165 return super().columndef_sql(expression, sep) 2166 2167 def join_sql(self, expression: exp.Join) -> str: 2168 if ( 2169 not expression.args.get("using") 2170 and not expression.args.get("on") 2171 and not expression.method 2172 and (expression.kind in ("", "INNER", "OUTER")) 2173 ): 2174 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 2175 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 2176 if isinstance(expression.this, exp.Unnest): 2177 return super().join_sql(expression.on(exp.true())) 2178 2179 expression.set("side", None) 2180 expression.set("kind", None) 2181 2182 return super().join_sql(expression) 2183 2184 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 2185 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 2186 if expression.args.get("is_end_exclusive"): 2187 return rename_func("RANGE")(self, expression) 2188 2189 return self.function_fallback_sql(expression) 2190 2191 def countif_sql(self, expression: exp.CountIf) -> str: 2192 if self.dialect.version >= (1, 2): 2193 return self.function_fallback_sql(expression) 2194 2195 # https://github.com/tobymao/sqlglot/pull/4749 2196 return count_if_to_sum(self, expression) 2197 2198 def bracket_sql(self, expression: exp.Bracket) -> str: 2199 if self.dialect.version >= (1, 2): 2200 return super().bracket_sql(expression) 2201 2202 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 2203 this = expression.this 2204 if isinstance(this, exp.Array): 2205 this.replace(exp.paren(this)) 2206 2207 bracket = super().bracket_sql(expression) 2208 2209 if not expression.args.get("returns_list_for_maps"): 2210 if not this.type: 2211 from sqlglot.optimizer.annotate_types import annotate_types 2212 2213 this = annotate_types(this, dialect=self.dialect) 2214 2215 if this.is_type(exp.DataType.Type.MAP): 2216 bracket = f"({bracket})[1]" 2217 2218 return bracket 2219 2220 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 2221 func = expression.this 2222 2223 # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP 2224 # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y) 2225 if isinstance(func, exp.ArrayAgg): 2226 if not isinstance(order := expression.expression, exp.Order): 2227 return self.sql(func) 2228 2229 # Save the original column for FILTER clause (before wrapping with Order) 2230 original_this = func.this 2231 2232 # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order 2233 # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions) 2234 func.set( 2235 "this", 2236 exp.Order( 2237 this=func.this.copy(), 2238 expressions=order.expressions, 2239 ), 2240 ) 2241 2242 # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed 2243 # Use original_this (not the Order-wrapped version) for the FILTER condition 2244 array_agg_sql = self.function_fallback_sql(func) 2245 return self._add_arrayagg_null_filter(array_agg_sql, func, original_this) 2246 2247 # For other functions (like PERCENTILES), use existing logic 2248 expression_sql = self.sql(expression, "expression") 2249 2250 if isinstance(func, exp.PERCENTILES): 2251 # Make the order key the first arg and slide the fraction to the right 2252 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 2253 order_col = expression.find(exp.Ordered) 2254 if order_col: 2255 func.set("expression", func.this) 2256 func.set("this", order_col.this) 2257 2258 this = self.sql(expression, "this").rstrip(")") 2259 2260 return f"{this}{expression_sql})" 2261 2262 def length_sql(self, expression: exp.Length) -> str: 2263 arg = expression.this 2264 2265 # Dialects like BQ and Snowflake also accept binary values as args, so 2266 # DDB will attempt to infer the type or resort to case/when resolution 2267 if not expression.args.get("binary") or arg.is_string: 2268 return self.func("LENGTH", arg) 2269 2270 if not arg.type: 2271 from sqlglot.optimizer.annotate_types import annotate_types 2272 2273 arg = annotate_types(arg, dialect=self.dialect) 2274 2275 if arg.is_type(*exp.DataType.TEXT_TYPES): 2276 return self.func("LENGTH", arg) 2277 2278 # We need these casts to make duckdb's static type checker happy 2279 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 2280 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 2281 2282 case = ( 2283 exp.case(self.func("TYPEOF", arg)) 2284 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 2285 .else_( 2286 exp.Anonymous(this="LENGTH", expressions=[varchar]) 2287 ) # anonymous to break length_sql recursion 2288 ) 2289 2290 return self.sql(case) 2291 2292 def lower_sql(self, expression: exp.Lower) -> str: 2293 result_sql = self.func("LOWER", _cast_to_varchar(expression.this)) 2294 return _gen_with_cast_to_blob(self, expression, result_sql) 2295 2296 def upper_sql(self, expression: exp.Upper) -> str: 2297 result_sql = self.func("UPPER", _cast_to_varchar(expression.this)) 2298 return _gen_with_cast_to_blob(self, expression, result_sql) 2299 2300 def replace_sql(self, expression: exp.Replace) -> str: 2301 result_sql = self.func( 2302 "REPLACE", 2303 _cast_to_varchar(expression.this), 2304 _cast_to_varchar(expression.expression), 2305 _cast_to_varchar(expression.args.get("replacement")), 2306 ) 2307 return _gen_with_cast_to_blob(self, expression, result_sql) 2308 2309 def _bitwise_op(self, expression: exp.Binary, op: str) -> str: 2310 _prepare_binary_bitwise_args(expression) 2311 result_sql = self.binary(expression, op) 2312 return _gen_with_cast_to_blob(self, expression, result_sql) 2313 2314 def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str: 2315 _prepare_binary_bitwise_args(expression) 2316 result_sql = self.func("XOR", expression.this, expression.expression) 2317 return _gen_with_cast_to_blob(self, expression, result_sql) 2318 2319 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 2320 this = expression.this 2321 key = expression.args.get("key") 2322 key_sql = key.name if isinstance(key, exp.Expression) else "" 2323 value_sql = self.sql(expression, "value") 2324 2325 kv_sql = f"{key_sql} := {value_sql}" 2326 2327 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 2328 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 2329 if isinstance(this, exp.Struct) and not this.expressions: 2330 return self.func("STRUCT_PACK", kv_sql) 2331 2332 return self.func("STRUCT_INSERT", this, kv_sql) 2333 2334 def startswith_sql(self, expression: exp.StartsWith) -> str: 2335 return self.func( 2336 "STARTS_WITH", 2337 _cast_to_varchar(expression.this), 2338 _cast_to_varchar(expression.expression), 2339 ) 2340 2341 def unnest_sql(self, expression: exp.Unnest) -> str: 2342 explode_array = expression.args.get("explode_array") 2343 if explode_array: 2344 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 2345 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 2346 expression.expressions.append( 2347 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 2348 ) 2349 2350 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 2351 alias = expression.args.get("alias") 2352 if isinstance(alias, exp.TableAlias): 2353 expression.set("alias", None) 2354 if alias.columns: 2355 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 2356 2357 unnest_sql = super().unnest_sql(expression) 2358 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 2359 return self.sql(select) 2360 2361 return super().unnest_sql(expression) 2362 2363 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 2364 this = expression.this 2365 2366 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2367 # DuckDB should render IGNORE NULLS only for the general-purpose 2368 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 2369 return super().ignorenulls_sql(expression) 2370 2371 if isinstance(this, exp.First): 2372 this = exp.AnyValue(this=this.this) 2373 2374 if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)): 2375 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 2376 2377 return self.sql(this) 2378 2379 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 2380 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2381 # DuckDB should render RESPECT NULLS only for the general-purpose 2382 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 2383 return super().respectnulls_sql(expression) 2384 2385 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 2386 return self.sql(expression, "this") 2387 2388 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 2389 this = self.sql(expression, "this") 2390 null_text = self.sql(expression, "null") 2391 2392 if null_text: 2393 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 2394 2395 return self.func("ARRAY_TO_STRING", this, expression.expression) 2396 2397 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 2398 this = expression.this 2399 group = expression.args.get("group") 2400 params = expression.args.get("parameters") 2401 position = expression.args.get("position") 2402 occurrence = expression.args.get("occurrence") 2403 null_if_pos_overflow = expression.args.get("null_if_pos_overflow") 2404 2405 if position and (not position.is_int or position.to_py() > 1): 2406 this = exp.Substring(this=this, start=position) 2407 2408 if null_if_pos_overflow: 2409 this = exp.Nullif(this=this, expression=exp.Literal.string("")) 2410 2411 # Do not render group if there is no following argument, 2412 # and it's the default value for this dialect 2413 if ( 2414 not params 2415 and group 2416 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 2417 ): 2418 group = None 2419 2420 if occurrence and (not occurrence.is_int or occurrence.to_py() > 1): 2421 return self.func( 2422 "ARRAY_EXTRACT", 2423 self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params), 2424 exp.Literal.number(occurrence), 2425 ) 2426 2427 return self.func("REGEXP_EXTRACT", this, expression.expression, group, params) 2428 2429 @unsupported_args("culture") 2430 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 2431 fmt = expression.args.get("format") 2432 if fmt and fmt.is_int: 2433 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 2434 2435 self.unsupported("Only integer formats are supported by NumberToStr") 2436 return self.function_fallback_sql(expression) 2437 2438 def autoincrementcolumnconstraint_sql(self, _) -> str: 2439 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 2440 return "" 2441 2442 def aliases_sql(self, expression: exp.Aliases) -> str: 2443 this = expression.this 2444 if isinstance(this, exp.Posexplode): 2445 return self.posexplode_sql(this) 2446 2447 return super().aliases_sql(expression) 2448 2449 def posexplode_sql(self, expression: exp.Posexplode) -> str: 2450 this = expression.this 2451 parent = expression.parent 2452 2453 # The default Spark aliases are "pos" and "col", unless specified otherwise 2454 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 2455 2456 if isinstance(parent, exp.Aliases): 2457 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 2458 pos, col = parent.expressions 2459 elif isinstance(parent, exp.Table): 2460 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 2461 alias = parent.args.get("alias") 2462 if alias: 2463 pos, col = alias.columns or [pos, col] 2464 alias.pop() 2465 2466 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 2467 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 2468 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 2469 gen_subscripts = self.sql( 2470 exp.Alias( 2471 this=exp.Anonymous( 2472 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 2473 ) 2474 - exp.Literal.number(1), 2475 alias=pos, 2476 ) 2477 ) 2478 2479 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 2480 2481 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 2482 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 2483 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 2484 2485 return posexplode_sql 2486 2487 def addmonths_sql(self, expression: exp.AddMonths) -> str: 2488 """ 2489 Handles three key issues: 2490 1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers 2491 2. End-of-month preservation: If input is last day of month, result is last day of result month 2492 3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP) 2493 """ 2494 from sqlglot.optimizer.annotate_types import annotate_types 2495 2496 this = expression.this 2497 if not this.type: 2498 this = annotate_types(this, dialect=self.dialect) 2499 2500 if this.is_type(*exp.DataType.TEXT_TYPES): 2501 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 2502 2503 # Detect float/decimal months to apply rounding (Snowflake behavior) 2504 # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS 2505 months_expr = expression.expression 2506 if not months_expr.type: 2507 months_expr = annotate_types(months_expr, dialect=self.dialect) 2508 2509 # Build interval or to_months expression based on type 2510 # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT)) 2511 interval_or_to_months = ( 2512 exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT")) 2513 if months_expr.is_type( 2514 exp.DataType.Type.FLOAT, 2515 exp.DataType.Type.DOUBLE, 2516 exp.DataType.Type.DECIMAL, 2517 ) 2518 # Integer case: standard INTERVAL N MONTH syntax 2519 else exp.Interval(this=months_expr, unit=exp.var("MONTH")) 2520 ) 2521 2522 date_add_expr = exp.Add(this=this, expression=interval_or_to_months) 2523 2524 # Apply end-of-month preservation if Snowflake flag is set 2525 # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END 2526 preserve_eom = expression.args.get("preserve_end_of_month") 2527 result_expr = ( 2528 exp.case() 2529 .when( 2530 exp.EQ(this=exp.func("LAST_DAY", this), expression=this), 2531 exp.func("LAST_DAY", date_add_expr), 2532 ) 2533 .else_(date_add_expr) 2534 if preserve_eom 2535 else date_add_expr 2536 ) 2537 2538 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 2539 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 2540 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 2541 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 2542 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 2543 return self.sql(exp.Cast(this=result_expr, to=this.type)) 2544 return self.sql(result_expr) 2545 2546 def format_sql(self, expression: exp.Format) -> str: 2547 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2548 return self.func("FORMAT", "'{}'", expression.expressions[0]) 2549 2550 return self.function_fallback_sql(expression) 2551 2552 def hexstring_sql( 2553 self, expression: exp.HexString, binary_function_repr: t.Optional[str] = None 2554 ) -> str: 2555 # UNHEX('FF') correctly produces blob \xFF in DuckDB 2556 return super().hexstring_sql(expression, binary_function_repr="UNHEX") 2557 2558 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 2559 unit = unit_to_str(expression) 2560 date = expression.this 2561 result = self.func("DATE_TRUNC", unit, date) 2562 2563 if expression.args.get("input_type_preserved"): 2564 if not date.type: 2565 from sqlglot.optimizer.annotate_types import annotate_types 2566 2567 date = annotate_types(date, dialect=self.dialect) 2568 2569 if date.type and date.is_type(*exp.DataType.TEMPORAL_TYPES): 2570 return self.sql(exp.Cast(this=result, to=date.type)) 2571 return result 2572 2573 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 2574 unit = unit_to_str(expression) 2575 zone = expression.args.get("zone") 2576 timestamp = expression.this 2577 2578 if is_date_unit(unit) and zone: 2579 # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC. 2580 # Double AT TIME ZONE needed for BigQuery compatibility: 2581 # 1. First AT TIME ZONE: ensures truncation happens in the target timezone 2582 # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component) 2583 timestamp = exp.AtTimeZone(this=timestamp, zone=zone) 2584 result_sql = self.func("DATE_TRUNC", unit, timestamp) 2585 return self.sql(exp.AtTimeZone(this=result_sql, zone=zone)) 2586 2587 result = self.func("DATE_TRUNC", unit, timestamp) 2588 if expression.args.get("input_type_preserved"): 2589 if not timestamp.type: 2590 from sqlglot.optimizer.annotate_types import annotate_types 2591 2592 timestamp = annotate_types(timestamp, dialect=self.dialect) 2593 2594 if timestamp.type and timestamp.is_type( 2595 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ 2596 ): 2597 dummy_date = exp.Cast( 2598 this=exp.Literal.string("1970-01-01"), 2599 to=exp.DataType(this=exp.DataType.Type.DATE), 2600 ) 2601 date_time = exp.Add(this=dummy_date, expression=timestamp) 2602 result = self.func("DATE_TRUNC", unit, date_time) 2603 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2604 2605 if timestamp.type and timestamp.is_type(*exp.DataType.TEMPORAL_TYPES): 2606 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2607 return result 2608 2609 def trim_sql(self, expression: exp.Trim) -> str: 2610 expression.this.replace(_cast_to_varchar(expression.this)) 2611 if expression.expression: 2612 expression.expression.replace(_cast_to_varchar(expression.expression)) 2613 2614 result_sql = super().trim_sql(expression) 2615 return _gen_with_cast_to_blob(self, expression, result_sql) 2616 2617 def round_sql(self, expression: exp.Round) -> str: 2618 this = expression.this 2619 decimals = expression.args.get("decimals") 2620 truncate = expression.args.get("truncate") 2621 2622 # DuckDB requires the scale (decimals) argument to be an INT 2623 # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally 2624 if decimals is not None and expression.args.get("casts_non_integer_decimals"): 2625 if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)): 2626 decimals = exp.cast(decimals, exp.DataType.Type.INT) 2627 2628 func = "ROUND" 2629 if truncate: 2630 # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN 2631 if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"): 2632 func = "ROUND_EVEN" 2633 truncate = None 2634 # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO 2635 elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"): 2636 truncate = None 2637 2638 return self.func(func, this, decimals, truncate) 2639 2640 def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str: 2641 """ 2642 BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values 2643 dividing the input distribution into n equal-sized buckets. 2644 2645 Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery 2646 does not document the specific algorithm used so results may differ. DuckDB does not 2647 support RESPECT NULLS. 2648 """ 2649 this = expression.this 2650 if isinstance(this, exp.Distinct): 2651 # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both 2652 if len(this.expressions) < 2: 2653 self.unsupported("APPROX_QUANTILES requires a bucket count argument") 2654 return self.function_fallback_sql(expression) 2655 num_quantiles_expr = this.expressions[1].pop() 2656 else: 2657 num_quantiles_expr = expression.expression 2658 2659 if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int: 2660 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2661 return self.function_fallback_sql(expression) 2662 2663 num_quantiles = t.cast(int, num_quantiles_expr.to_py()) 2664 if num_quantiles <= 0: 2665 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2666 return self.function_fallback_sql(expression) 2667 2668 quantiles = [ 2669 exp.Literal.number(Decimal(i) / Decimal(num_quantiles)) 2670 for i in range(num_quantiles + 1) 2671 ] 2672 2673 return self.sql( 2674 exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)) 2675 ) 2676 2677 def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str: 2678 if expression.args.get("scalar_only"): 2679 expression = exp.JSONExtractScalar( 2680 this=rename_func("JSON_VALUE")(self, expression), expression="'$'" 2681 ) 2682 return _arrow_json_extract_sql(self, expression) 2683 2684 def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str: 2685 this = expression.this 2686 2687 if _is_binary(this): 2688 expression.type = exp.DataType.build("BINARY") 2689 2690 arg = _cast_to_bit(this) 2691 2692 if isinstance(this, exp.Neg): 2693 arg = exp.Paren(this=arg) 2694 2695 expression.set("this", arg) 2696 2697 result_sql = f"~{self.sql(expression, 'this')}" 2698 2699 return _gen_with_cast_to_blob(self, expression, result_sql) 2700 2701 def window_sql(self, expression: exp.Window) -> str: 2702 this = expression.this 2703 if isinstance(this, exp.Corr) or ( 2704 isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr) 2705 ): 2706 return self._corr_sql(expression) 2707 2708 return super().window_sql(expression) 2709 2710 def filter_sql(self, expression: exp.Filter) -> str: 2711 if isinstance(expression.this, exp.Corr): 2712 return self._corr_sql(expression) 2713 2714 return super().filter_sql(expression) 2715 2716 def _corr_sql( 2717 self, 2718 expression: t.Union[exp.Filter, exp.Window, exp.Corr], 2719 ) -> str: 2720 if isinstance(expression, exp.Corr) and not expression.args.get( 2721 "null_on_zero_variance" 2722 ): 2723 return self.func("CORR", expression.this, expression.expression) 2724 2725 corr_expr = _maybe_corr_null_to_false(expression) 2726 if corr_expr is None: 2727 if isinstance(expression, exp.Window): 2728 return super().window_sql(expression) 2729 if isinstance(expression, exp.Filter): 2730 return super().filter_sql(expression) 2731 corr_expr = expression # make mypy happy 2732 2733 return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
968class DuckDB(Dialect): 969 NULL_ORDERING = "nulls_are_last" 970 SUPPORTS_USER_DEFINED_TYPES = True 971 SAFE_DIVISION = True 972 INDEX_OFFSET = 1 973 CONCAT_COALESCE = True 974 SUPPORTS_ORDER_BY_ALL = True 975 SUPPORTS_FIXED_SIZE_ARRAYS = True 976 STRICT_JSON_PATH_SYNTAX = False 977 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 978 979 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 980 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 981 982 DATE_PART_MAPPING = { 983 **Dialect.DATE_PART_MAPPING, 984 "DAYOFWEEKISO": "ISODOW", 985 } 986 987 DATE_PART_MAPPING.pop("WEEKDAY") 988 989 INVERSE_TIME_MAPPING = { 990 "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d) 991 } 992 993 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 994 if isinstance(path, exp.Literal): 995 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 996 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 997 # This check ensures we'll avoid trying to parse these as JSON paths, which can 998 # either result in a noisy warning or in an invalid representation of the path. 999 path_text = path.name 1000 if path_text.startswith("/") or "[#" in path_text: 1001 return path 1002 1003 return super().to_json_path(path) 1004 1005 class Tokenizer(tokens.Tokenizer): 1006 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 1007 HEREDOC_STRINGS = ["$"] 1008 1009 HEREDOC_TAG_IS_IDENTIFIER = True 1010 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 1011 1012 KEYWORDS = { 1013 **tokens.Tokenizer.KEYWORDS, 1014 "//": TokenType.DIV, 1015 "**": TokenType.DSTAR, 1016 "^@": TokenType.CARET_AT, 1017 "@>": TokenType.AT_GT, 1018 "<@": TokenType.LT_AT, 1019 "ATTACH": TokenType.ATTACH, 1020 "BINARY": TokenType.VARBINARY, 1021 "BITSTRING": TokenType.BIT, 1022 "BPCHAR": TokenType.TEXT, 1023 "CHAR": TokenType.TEXT, 1024 "DATETIME": TokenType.TIMESTAMPNTZ, 1025 "DETACH": TokenType.DETACH, 1026 "FORCE": TokenType.FORCE, 1027 "INSTALL": TokenType.INSTALL, 1028 "INT8": TokenType.BIGINT, 1029 "LOGICAL": TokenType.BOOLEAN, 1030 "MACRO": TokenType.FUNCTION, 1031 "ONLY": TokenType.ONLY, 1032 "PIVOT_WIDER": TokenType.PIVOT, 1033 "POSITIONAL": TokenType.POSITIONAL, 1034 "RESET": TokenType.COMMAND, 1035 "ROW": TokenType.STRUCT, 1036 "SIGNED": TokenType.INT, 1037 "STRING": TokenType.TEXT, 1038 "SUMMARIZE": TokenType.SUMMARIZE, 1039 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 1040 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 1041 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 1042 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 1043 "TIMESTAMP_US": TokenType.TIMESTAMP, 1044 "UBIGINT": TokenType.UBIGINT, 1045 "UINTEGER": TokenType.UINT, 1046 "USMALLINT": TokenType.USMALLINT, 1047 "UTINYINT": TokenType.UTINYINT, 1048 "VARCHAR": TokenType.TEXT, 1049 } 1050 KEYWORDS.pop("/*+") 1051 1052 SINGLE_TOKENS = { 1053 **tokens.Tokenizer.SINGLE_TOKENS, 1054 "$": TokenType.PARAMETER, 1055 } 1056 1057 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1058 1059 class Parser(parser.Parser): 1060 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 1061 1062 BITWISE = parser.Parser.BITWISE.copy() 1063 BITWISE.pop(TokenType.CARET) 1064 1065 RANGE_PARSERS = { 1066 **parser.Parser.RANGE_PARSERS, 1067 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 1068 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 1069 TokenType.TILDA: binary_range_parser(exp.RegexpFullMatch), 1070 } 1071 1072 EXPONENT = { 1073 **parser.Parser.EXPONENT, 1074 TokenType.CARET: exp.Pow, 1075 TokenType.DSTAR: exp.Pow, 1076 } 1077 1078 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 1079 1080 SHOW_PARSERS = { 1081 "TABLES": _show_parser("TABLES"), 1082 "ALL TABLES": _show_parser("ALL TABLES"), 1083 } 1084 1085 FUNCTIONS = { 1086 **parser.Parser.FUNCTIONS, 1087 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 1088 "ARRAY_PREPEND": _build_array_prepend, 1089 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 1090 "ARRAY_SORT": exp.SortArray.from_arg_list, 1091 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 1092 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 1093 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 1094 "DATEDIFF": _build_date_diff, 1095 "DATE_DIFF": _build_date_diff, 1096 "DATE_TRUNC": date_trunc_to_time, 1097 "DATETRUNC": date_trunc_to_time, 1098 "DECODE": lambda args: exp.Decode( 1099 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 1100 ), 1101 "EDITDIST3": exp.Levenshtein.from_arg_list, 1102 "ENCODE": lambda args: exp.Encode( 1103 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 1104 ), 1105 "EPOCH": exp.TimeToUnix.from_arg_list, 1106 "EPOCH_MS": lambda args: exp.UnixToTime( 1107 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 1108 ), 1109 "GENERATE_SERIES": _build_generate_series(), 1110 "GET_BIT": lambda args: exp.Getbit( 1111 this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True 1112 ), 1113 "JSON": exp.ParseJSON.from_arg_list, 1114 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 1115 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 1116 "LIST_APPEND": exp.ArrayAppend.from_arg_list, 1117 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 1118 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 1119 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 1120 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 1121 "LIST_HAS": exp.ArrayContains.from_arg_list, 1122 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 1123 "LIST_PREPEND": _build_array_prepend, 1124 "LIST_REVERSE_SORT": _build_sort_array_desc, 1125 "LIST_SORT": exp.SortArray.from_arg_list, 1126 "LIST_TRANSFORM": exp.Transform.from_arg_list, 1127 "LIST_VALUE": lambda args: exp.Array(expressions=args), 1128 "MAKE_DATE": exp.DateFromParts.from_arg_list, 1129 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 1130 "MAKE_TIMESTAMP": _build_make_timestamp, 1131 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 1132 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 1133 "RANGE": _build_generate_series(end_exclusive=True), 1134 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 1135 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 1136 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 1137 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 1138 this=seq_get(args, 0), 1139 expression=seq_get(args, 1), 1140 replacement=seq_get(args, 2), 1141 modifiers=seq_get(args, 3), 1142 single_replace=True, 1143 ), 1144 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 1145 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 1146 "STRING_SPLIT": exp.Split.from_arg_list, 1147 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 1148 "STRING_TO_ARRAY": exp.Split.from_arg_list, 1149 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 1150 "STRUCT_PACK": exp.Struct.from_arg_list, 1151 "STR_SPLIT": exp.Split.from_arg_list, 1152 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 1153 "TIME_BUCKET": exp.DateBin.from_arg_list, 1154 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 1155 "UNNEST": exp.Explode.from_arg_list, 1156 "XOR": binary_from_function(exp.BitwiseXor), 1157 } 1158 1159 FUNCTIONS.pop("DATE_SUB") 1160 FUNCTIONS.pop("GLOB") 1161 1162 FUNCTION_PARSERS = { 1163 **parser.Parser.FUNCTION_PARSERS, 1164 **dict.fromkeys( 1165 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 1166 ), 1167 } 1168 FUNCTION_PARSERS.pop("DECODE") 1169 1170 NO_PAREN_FUNCTION_PARSERS = { 1171 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 1172 "MAP": lambda self: self._parse_map(), 1173 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 1174 } 1175 1176 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 1177 TokenType.SEMI, 1178 TokenType.ANTI, 1179 } 1180 1181 PLACEHOLDER_PARSERS = { 1182 **parser.Parser.PLACEHOLDER_PARSERS, 1183 TokenType.PARAMETER: lambda self: ( 1184 self.expression(exp.Placeholder, this=self._prev.text) 1185 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 1186 else None 1187 ), 1188 } 1189 1190 TYPE_CONVERTERS = { 1191 # https://duckdb.org/docs/sql/data_types/numeric 1192 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 1193 # https://duckdb.org/docs/sql/data_types/text 1194 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 1195 } 1196 1197 STATEMENT_PARSERS = { 1198 **parser.Parser.STATEMENT_PARSERS, 1199 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 1200 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 1201 TokenType.FORCE: lambda self: self._parse_force(), 1202 TokenType.INSTALL: lambda self: self._parse_install(), 1203 TokenType.SHOW: lambda self: self._parse_show(), 1204 } 1205 1206 SET_PARSERS = { 1207 **parser.Parser.SET_PARSERS, 1208 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 1209 } 1210 1211 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 1212 index = self._index 1213 if not self._match_text_seq("LAMBDA"): 1214 return super()._parse_lambda(alias=alias) 1215 1216 expressions = self._parse_csv(self._parse_lambda_arg) 1217 if not self._match(TokenType.COLON): 1218 self._retreat(index) 1219 return None 1220 1221 this = self._replace_lambda(self._parse_assignment(), expressions) 1222 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 1223 1224 def _parse_expression(self) -> t.Optional[exp.Expression]: 1225 # DuckDB supports prefix aliases, e.g. foo: 1 1226 if self._next and self._next.token_type == TokenType.COLON: 1227 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 1228 self._match(TokenType.COLON) 1229 comments = self._prev_comments or [] 1230 1231 this = self._parse_assignment() 1232 if isinstance(this, exp.Expression): 1233 # Moves the comment next to the alias in `alias: expr /* comment */` 1234 comments += this.pop_comments() or [] 1235 1236 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 1237 1238 return super()._parse_expression() 1239 1240 def _parse_table( 1241 self, 1242 schema: bool = False, 1243 joins: bool = False, 1244 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1245 parse_bracket: bool = False, 1246 is_db_reference: bool = False, 1247 parse_partition: bool = False, 1248 consume_pipe: bool = False, 1249 ) -> t.Optional[exp.Expression]: 1250 # DuckDB supports prefix aliases, e.g. FROM foo: bar 1251 if self._next and self._next.token_type == TokenType.COLON: 1252 alias = self._parse_table_alias( 1253 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1254 ) 1255 self._match(TokenType.COLON) 1256 comments = self._prev_comments or [] 1257 else: 1258 alias = None 1259 comments = [] 1260 1261 table = super()._parse_table( 1262 schema=schema, 1263 joins=joins, 1264 alias_tokens=alias_tokens, 1265 parse_bracket=parse_bracket, 1266 is_db_reference=is_db_reference, 1267 parse_partition=parse_partition, 1268 ) 1269 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 1270 # Moves the comment next to the alias in `alias: table /* comment */` 1271 comments += table.pop_comments() or [] 1272 alias.comments = alias.pop_comments() + comments 1273 table.set("alias", alias) 1274 1275 return table 1276 1277 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 1278 # https://duckdb.org/docs/sql/samples.html 1279 sample = super()._parse_table_sample(as_modifier=as_modifier) 1280 if sample and not sample.args.get("method"): 1281 if sample.args.get("size"): 1282 sample.set("method", exp.var("RESERVOIR")) 1283 else: 1284 sample.set("method", exp.var("SYSTEM")) 1285 1286 return sample 1287 1288 def _parse_bracket( 1289 self, this: t.Optional[exp.Expression] = None 1290 ) -> t.Optional[exp.Expression]: 1291 bracket = super()._parse_bracket(this) 1292 1293 if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket): 1294 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1295 bracket.set("returns_list_for_maps", True) 1296 1297 return bracket 1298 1299 def _parse_map(self) -> exp.ToMap | exp.Map: 1300 if self._match(TokenType.L_BRACE, advance=False): 1301 return self.expression(exp.ToMap, this=self._parse_bracket()) 1302 1303 args = self._parse_wrapped_csv(self._parse_assignment) 1304 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 1305 1306 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 1307 return self._parse_field_def() 1308 1309 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 1310 if len(aggregations) == 1: 1311 return super()._pivot_column_names(aggregations) 1312 return pivot_column_names(aggregations, dialect="duckdb") 1313 1314 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 1315 def _parse_attach_option() -> exp.AttachOption: 1316 return self.expression( 1317 exp.AttachOption, 1318 this=self._parse_var(any_token=True), 1319 expression=self._parse_field(any_token=True), 1320 ) 1321 1322 self._match(TokenType.DATABASE) 1323 exists = self._parse_exists(not_=is_attach) 1324 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 1325 1326 if self._match(TokenType.L_PAREN, advance=False): 1327 expressions = self._parse_wrapped_csv(_parse_attach_option) 1328 else: 1329 expressions = None 1330 1331 return ( 1332 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 1333 if is_attach 1334 else self.expression(exp.Detach, this=this, exists=exists) 1335 ) 1336 1337 def _parse_show_duckdb(self, this: str) -> exp.Show: 1338 return self.expression(exp.Show, this=this) 1339 1340 def _parse_force(self) -> exp.Install | exp.Command: 1341 # FORCE can only be followed by INSTALL or CHECKPOINT 1342 # In the case of CHECKPOINT, we fallback 1343 if not self._match(TokenType.INSTALL): 1344 return self._parse_as_command(self._prev) 1345 1346 return self._parse_install(force=True) 1347 1348 def _parse_install(self, force: bool = False) -> exp.Install: 1349 return self.expression( 1350 exp.Install, 1351 this=self._parse_id_var(), 1352 from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None, 1353 force=force, 1354 ) 1355 1356 def _parse_primary(self) -> t.Optional[exp.Expression]: 1357 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 1358 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 1359 1360 return super()._parse_primary() 1361 1362 class Generator(generator.Generator): 1363 PARAMETER_TOKEN = "$" 1364 NAMED_PLACEHOLDER_TOKEN = "$" 1365 JOIN_HINTS = False 1366 TABLE_HINTS = False 1367 QUERY_HINTS = False 1368 LIMIT_FETCH = "LIMIT" 1369 STRUCT_DELIMITER = ("(", ")") 1370 RENAME_TABLE_WITH_DB = False 1371 NVL2_SUPPORTED = False 1372 SEMI_ANTI_JOIN_WITH_SIDE = False 1373 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 1374 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 1375 LAST_DAY_SUPPORTS_DATE_PART = False 1376 JSON_KEY_VALUE_PAIR_SEP = "," 1377 IGNORE_NULLS_IN_FUNC = True 1378 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 1379 SUPPORTS_CREATE_TABLE_LIKE = False 1380 MULTI_ARG_DISTINCT = False 1381 CAN_IMPLEMENT_ARRAY_ANY = True 1382 SUPPORTS_TO_NUMBER = False 1383 SUPPORTS_WINDOW_EXCLUDE = True 1384 COPY_HAS_INTO_KEYWORD = False 1385 STAR_EXCEPT = "EXCLUDE" 1386 PAD_FILL_PATTERN_IS_REQUIRED = True 1387 ARRAY_CONCAT_IS_VAR_LEN = False 1388 ARRAY_SIZE_DIM_REQUIRED = False 1389 NORMALIZE_EXTRACT_DATE_PARTS = True 1390 SUPPORTS_LIKE_QUANTIFIERS = False 1391 SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True 1392 1393 TRANSFORMS = { 1394 **generator.Generator.TRANSFORMS, 1395 exp.AnyValue: _anyvalue_sql, 1396 exp.ApproxDistinct: approx_count_distinct_sql, 1397 exp.Boolnot: lambda self, e: f"NOT ({self.sql(e, 'this')})", 1398 exp.Array: transforms.preprocess( 1399 [transforms.inherit_struct_field_names], 1400 generator=inline_array_unless_query, 1401 ), 1402 exp.ArrayAppend: rename_func("LIST_APPEND"), 1403 exp.ArrayFilter: rename_func("LIST_FILTER"), 1404 exp.ArrayRemove: remove_from_array_using_filter, 1405 exp.ArraySort: _array_sort_sql, 1406 exp.ArrayPrepend: lambda self, e: self.func("LIST_PREPEND", e.expression, e.this), 1407 exp.ArraySum: rename_func("LIST_SUM"), 1408 exp.ArrayUniqueAgg: lambda self, e: self.func( 1409 "LIST", exp.Distinct(expressions=[e.this]) 1410 ), 1411 exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"), 1412 exp.BitwiseAndAgg: _bitwise_agg_sql, 1413 exp.BitwiseLeftShift: _bitshift_sql, 1414 exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"), 1415 exp.BitwiseOrAgg: _bitwise_agg_sql, 1416 exp.BitwiseRightShift: _bitshift_sql, 1417 exp.BitwiseXorAgg: _bitwise_agg_sql, 1418 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 1419 exp.Corr: lambda self, e: self._corr_sql(e), 1420 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 1421 exp.CurrentTime: lambda *_: "CURRENT_TIME", 1422 exp.CurrentTimestamp: lambda self, e: self.sql( 1423 exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC")) 1424 ) 1425 if e.args.get("sysdate") 1426 else "CURRENT_TIMESTAMP", 1427 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1428 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1429 exp.DayOfWeekIso: rename_func("ISODOW"), 1430 exp.DayOfYear: rename_func("DAYOFYEAR"), 1431 exp.Dayname: lambda self, e: ( 1432 self.func("STRFTIME", e.this, exp.Literal.string("%a")) 1433 if e.args.get("abbreviated") 1434 else self.func("DAYNAME", e.this) 1435 ), 1436 exp.Monthname: lambda self, e: ( 1437 self.func("STRFTIME", e.this, exp.Literal.string("%b")) 1438 if e.args.get("abbreviated") 1439 else self.func("MONTHNAME", e.this) 1440 ), 1441 exp.DataType: _datatype_sql, 1442 exp.Date: _date_sql, 1443 exp.DateAdd: _date_delta_to_binary_interval_op(), 1444 exp.DateFromParts: _date_from_parts_sql, 1445 exp.DateSub: _date_delta_to_binary_interval_op(), 1446 exp.DateDiff: _date_diff_sql, 1447 exp.DateStrToDate: datestrtodate_sql, 1448 exp.Datetime: no_datetime_sql, 1449 exp.DatetimeDiff: _date_diff_sql, 1450 exp.DatetimeSub: _date_delta_to_binary_interval_op(), 1451 exp.DatetimeAdd: _date_delta_to_binary_interval_op(), 1452 exp.DateToDi: lambda self, 1453 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 1454 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 1455 exp.DiToDate: lambda self, 1456 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 1457 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 1458 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 1459 exp.GenerateDateArray: _generate_datetime_array_sql, 1460 exp.GenerateTimestampArray: _generate_datetime_array_sql, 1461 exp.Getbit: getbit_sql, 1462 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 1463 exp.Explode: rename_func("UNNEST"), 1464 exp.IntDiv: lambda self, e: self.binary(e, "//"), 1465 exp.IsInf: rename_func("ISINF"), 1466 exp.IsNan: rename_func("ISNAN"), 1467 exp.Ceil: _ceil_floor, 1468 exp.Floor: _ceil_floor, 1469 exp.JSONBExists: rename_func("JSON_EXISTS"), 1470 exp.JSONExtract: _arrow_json_extract_sql, 1471 exp.JSONExtractArray: _json_extract_value_array_sql, 1472 exp.JSONFormat: _json_format_sql, 1473 exp.JSONValueArray: _json_extract_value_array_sql, 1474 exp.Lateral: explode_to_unnest_sql, 1475 exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)), 1476 exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)), 1477 exp.BoolxorAgg: _boolxor_agg_sql, 1478 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 1479 exp.Initcap: _initcap_sql, 1480 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 1481 exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)), 1482 exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)), 1483 exp.MonthsBetween: months_between_sql, 1484 exp.PercentileCont: rename_func("QUANTILE_CONT"), 1485 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 1486 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 1487 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 1488 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 1489 exp.RegexpReplace: lambda self, e: self.func( 1490 "REGEXP_REPLACE", 1491 e.this, 1492 e.expression, 1493 e.args.get("replacement"), 1494 regexp_replace_global_modifier(e), 1495 ), 1496 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 1497 exp.RegexpILike: lambda self, e: self.func( 1498 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 1499 ), 1500 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 1501 exp.RegrValx: _regr_val_sql, 1502 exp.RegrValy: _regr_val_sql, 1503 exp.Return: lambda self, e: self.sql(e, "this"), 1504 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 1505 exp.Rand: rename_func("RANDOM"), 1506 exp.SHA: rename_func("SHA1"), 1507 exp.SHA2: sha256_sql, 1508 exp.Split: rename_func("STR_SPLIT"), 1509 exp.SortArray: _sort_array_sql, 1510 exp.StrPosition: strposition_sql, 1511 exp.StrToUnix: lambda self, e: self.func( 1512 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 1513 ), 1514 exp.Struct: _struct_sql, 1515 exp.Transform: rename_func("LIST_TRANSFORM"), 1516 exp.TimeAdd: _date_delta_to_binary_interval_op(), 1517 exp.TimeSub: _date_delta_to_binary_interval_op(), 1518 exp.Time: no_time_sql, 1519 exp.TimeDiff: _timediff_sql, 1520 exp.Timestamp: no_timestamp_sql, 1521 exp.TimestampAdd: _date_delta_to_binary_interval_op(), 1522 exp.TimestampDiff: lambda self, e: self.func( 1523 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 1524 ), 1525 exp.TimestampSub: _date_delta_to_binary_interval_op(), 1526 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 1527 exp.TimeStrToTime: timestrtotime_sql, 1528 exp.TimeStrToUnix: lambda self, e: self.func( 1529 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 1530 ), 1531 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 1532 exp.ToBoolean: _to_boolean_sql, 1533 exp.TimeToUnix: rename_func("EPOCH"), 1534 exp.TsOrDiToDi: lambda self, 1535 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 1536 exp.TsOrDsAdd: _date_delta_to_binary_interval_op(), 1537 exp.TsOrDsDiff: lambda self, e: self.func( 1538 "DATE_DIFF", 1539 f"'{e.args.get('unit') or 'DAY'}'", 1540 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 1541 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 1542 ), 1543 exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)), 1544 exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)), 1545 exp.UnixSeconds: lambda self, e: self.sql( 1546 exp.cast( 1547 self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DataType.Type.BIGINT 1548 ) 1549 ), 1550 exp.UnixToStr: lambda self, e: self.func( 1551 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 1552 ), 1553 exp.DatetimeTrunc: lambda self, e: self.func( 1554 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 1555 ), 1556 exp.UnixToTime: _unix_to_time_sql, 1557 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 1558 exp.VariancePop: rename_func("VAR_POP"), 1559 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1560 exp.YearOfWeek: lambda self, e: self.sql( 1561 exp.Extract( 1562 this=exp.Var(this="ISOYEAR"), 1563 expression=e.this, 1564 ) 1565 ), 1566 exp.YearOfWeekIso: lambda self, e: self.sql( 1567 exp.Extract( 1568 this=exp.Var(this="ISOYEAR"), 1569 expression=e.this, 1570 ) 1571 ), 1572 exp.Xor: bool_xor_sql, 1573 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1574 rename_func("LEVENSHTEIN") 1575 ), 1576 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 1577 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 1578 exp.DateBin: rename_func("TIME_BUCKET"), 1579 exp.LastDay: _last_day_sql, 1580 } 1581 1582 SUPPORTED_JSON_PATH_PARTS = { 1583 exp.JSONPathKey, 1584 exp.JSONPathRoot, 1585 exp.JSONPathSubscript, 1586 exp.JSONPathWildcard, 1587 } 1588 1589 TYPE_MAPPING = { 1590 **generator.Generator.TYPE_MAPPING, 1591 exp.DataType.Type.BINARY: "BLOB", 1592 exp.DataType.Type.BPCHAR: "TEXT", 1593 exp.DataType.Type.CHAR: "TEXT", 1594 exp.DataType.Type.DATETIME: "TIMESTAMP", 1595 exp.DataType.Type.DECFLOAT: "DECIMAL(38, 5)", 1596 exp.DataType.Type.FLOAT: "REAL", 1597 exp.DataType.Type.JSONB: "JSON", 1598 exp.DataType.Type.NCHAR: "TEXT", 1599 exp.DataType.Type.NVARCHAR: "TEXT", 1600 exp.DataType.Type.UINT: "UINTEGER", 1601 exp.DataType.Type.VARBINARY: "BLOB", 1602 exp.DataType.Type.ROWVERSION: "BLOB", 1603 exp.DataType.Type.VARCHAR: "TEXT", 1604 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMPTZ", 1605 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 1606 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 1607 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 1608 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 1609 exp.DataType.Type.BIGDECIMAL: "DECIMAL(38, 5)", 1610 } 1611 1612 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 1613 RESERVED_KEYWORDS = { 1614 "array", 1615 "analyse", 1616 "union", 1617 "all", 1618 "when", 1619 "in_p", 1620 "default", 1621 "create_p", 1622 "window", 1623 "asymmetric", 1624 "to", 1625 "else", 1626 "localtime", 1627 "from", 1628 "end_p", 1629 "select", 1630 "current_date", 1631 "foreign", 1632 "with", 1633 "grant", 1634 "session_user", 1635 "or", 1636 "except", 1637 "references", 1638 "fetch", 1639 "limit", 1640 "group_p", 1641 "leading", 1642 "into", 1643 "collate", 1644 "offset", 1645 "do", 1646 "then", 1647 "localtimestamp", 1648 "check_p", 1649 "lateral_p", 1650 "current_role", 1651 "where", 1652 "asc_p", 1653 "placing", 1654 "desc_p", 1655 "user", 1656 "unique", 1657 "initially", 1658 "column", 1659 "both", 1660 "some", 1661 "as", 1662 "any", 1663 "only", 1664 "deferrable", 1665 "null_p", 1666 "current_time", 1667 "true_p", 1668 "table", 1669 "case", 1670 "trailing", 1671 "variadic", 1672 "for", 1673 "on", 1674 "distinct", 1675 "false_p", 1676 "not", 1677 "constraint", 1678 "current_timestamp", 1679 "returning", 1680 "primary", 1681 "intersect", 1682 "having", 1683 "analyze", 1684 "current_user", 1685 "and", 1686 "cast", 1687 "symmetric", 1688 "using", 1689 "order", 1690 "current_catalog", 1691 } 1692 1693 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 1694 1695 # DuckDB doesn't generally support CREATE TABLE .. properties 1696 # https://duckdb.org/docs/sql/statements/create_table.html 1697 PROPERTIES_LOCATION = { 1698 prop: exp.Properties.Location.UNSUPPORTED 1699 for prop in generator.Generator.PROPERTIES_LOCATION 1700 } 1701 1702 # There are a few exceptions (e.g. temporary tables) which are supported or 1703 # can be transpiled to DuckDB, so we explicitly override them accordingly 1704 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 1705 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 1706 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 1707 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 1708 1709 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 1710 exp.FirstValue, 1711 exp.Lag, 1712 exp.LastValue, 1713 exp.Lead, 1714 exp.NthValue, 1715 ) 1716 1717 # Template for ZIPF transpilation - placeholders get replaced with actual parameters 1718 ZIPF_TEMPLATE: exp.Expression = exp.maybe_parse( 1719 """ 1720 WITH rand AS (SELECT :random_expr AS r), 1721 weights AS ( 1722 SELECT i, 1.0 / POWER(i, :s) AS w 1723 FROM RANGE(1, :n + 1) AS t(i) 1724 ), 1725 cdf AS ( 1726 SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p 1727 FROM weights 1728 ) 1729 SELECT MIN(i) 1730 FROM cdf 1731 WHERE p >= (SELECT r FROM rand) 1732 """ 1733 ) 1734 1735 # Template for NORMAL transpilation using Box-Muller transform 1736 # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2)) 1737 NORMAL_TEMPLATE: exp.Expression = exp.maybe_parse( 1738 ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))" 1739 ) 1740 1741 # Template for generating a seeded pseudo-random value in [0, 1) from a hash 1742 SEEDED_RANDOM_TEMPLATE: exp.Expression = exp.maybe_parse( 1743 "(ABS(HASH(:seed)) % 1000000) / 1000000.0" 1744 ) 1745 1746 # Template for RANDSTR transpilation - placeholders get replaced with actual parameters 1747 RANDSTR_TEMPLATE: exp.Expression = exp.maybe_parse( 1748 f""" 1749 SELECT LISTAGG( 1750 SUBSTRING( 1751 '{RANDSTR_CHAR_POOL}', 1752 1 + CAST(FLOOR(random_value * 62) AS INT), 1753 1 1754 ), 1755 '' 1756 ) 1757 FROM ( 1758 SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value 1759 FROM RANGE(:length) AS t(i) 1760 ) 1761 """, 1762 ) 1763 1764 def bitmapbucketnumber_sql( 1765 self: DuckDB.Generator, expression: exp.BitmapBucketNumber 1766 ) -> str: 1767 """ 1768 Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent. 1769 1770 Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where: 1771 - Each bucket covers 32,768 values 1772 - Bucket numbering starts at 1 1773 - Formula: ((value - 1) // 32768) + 1 for positive values 1774 1775 For non-positive values (0 and negative), we use value // 32768 to avoid 1776 producing bucket 0 or positive bucket IDs for negative inputs. 1777 """ 1778 value = expression.this 1779 1780 positive_formula = ((value - 1) // 32768) + 1 1781 non_positive_formula = value // 32768 1782 1783 # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END 1784 case_expr = ( 1785 exp.case() 1786 .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula) 1787 .else_(non_positive_formula) 1788 ) 1789 return self.sql(case_expr) 1790 1791 def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str: 1792 """ 1793 Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression. 1794 1795 Snowflake's BITMAP_BIT_POSITION behavior: 1796 - For n <= 0: returns ABS(n) % 32768 1797 - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767) 1798 """ 1799 this = expression.this 1800 1801 return self.sql( 1802 exp.Mod( 1803 this=exp.Paren( 1804 this=exp.If( 1805 this=exp.GT(this=this, expression=exp.Literal.number(0)), 1806 true=this - exp.Literal.number(1), 1807 false=exp.Abs(this=this), 1808 ) 1809 ), 1810 expression=MAX_BIT_POSITION, 1811 ) 1812 ) 1813 1814 def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str: 1815 """ 1816 Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. 1817 Uses a pre-parsed template with placeholders replaced by expression nodes. 1818 1819 RANDSTR(length, generator) generates a random string of specified length. 1820 - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result) 1821 - With RANDOM(): Use RANDOM() in the hash for non-deterministic output 1822 - No generator: Use default seed value 1823 """ 1824 length = expression.this 1825 generator = expression.args.get("generator") 1826 1827 if generator: 1828 if isinstance(generator, exp.Rand): 1829 # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself 1830 seed_value = generator.this or generator 1831 else: 1832 # Const/int or other expression - use as seed directly 1833 seed_value = generator 1834 else: 1835 # No generator specified, use default seed (arbitrary but deterministic) 1836 seed_value = exp.Literal.number(RANDSTR_SEED) 1837 1838 replacements = {"seed": seed_value, "length": length} 1839 return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})" 1840 1841 def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str: 1842 """ 1843 Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. 1844 Uses a pre-parsed template with placeholders replaced by expression nodes. 1845 """ 1846 s = expression.this 1847 n = expression.args["elementcount"] 1848 gen = expression.args["gen"] 1849 1850 if not isinstance(gen, exp.Rand): 1851 # (ABS(HASH(seed)) % 1000000) / 1000000.0 1852 random_expr: exp.Expression = exp.Div( 1853 this=exp.Paren( 1854 this=exp.Mod( 1855 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])), 1856 expression=exp.Literal.number(1000000), 1857 ) 1858 ), 1859 expression=exp.Literal.number(1000000.0), 1860 ) 1861 else: 1862 # Use RANDOM() for non-deterministic output 1863 random_expr = exp.Rand() 1864 1865 replacements = {"s": s, "n": n, "random_expr": random_expr} 1866 return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})" 1867 1868 def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str: 1869 """ 1870 TO_BINARY and TRY_TO_BINARY transpilation: 1871 - 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50') 1872 - 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST') 1873 - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==') 1874 1875 For TRY_TO_BINARY (safe=True), wrap with TRY(): 1876 - 'HEX': TRY_TO_BINARY('invalid', 'HEX') → TRY(UNHEX('invalid')) 1877 """ 1878 value = expression.this 1879 format_arg = expression.args.get("format") 1880 is_safe = expression.args.get("safe") 1881 1882 fmt = "HEX" 1883 if format_arg: 1884 fmt = format_arg.name.upper() 1885 1886 if expression.is_type(exp.DataType.Type.BINARY): 1887 if fmt == "UTF-8": 1888 result = self.func("ENCODE", value) 1889 elif fmt == "BASE64": 1890 result = self.func("FROM_BASE64", value) 1891 elif fmt == "HEX": 1892 result = self.func("UNHEX", value) 1893 else: 1894 if is_safe: 1895 return self.sql(exp.null()) 1896 else: 1897 self.unsupported(f"format {fmt} is not supported") 1898 result = self.func("TO_BINARY", value) 1899 1900 # Wrap with TRY() for TRY_TO_BINARY 1901 if is_safe: 1902 result = self.func("TRY", result) 1903 1904 return result 1905 1906 # Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake 1907 return self.func("TO_BINARY", value) 1908 1909 def _greatest_least_sql( 1910 self: DuckDB.Generator, expression: exp.Greatest | exp.Least 1911 ) -> str: 1912 """ 1913 Handle GREATEST/LEAST functions with dialect-aware NULL behavior. 1914 1915 - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL 1916 - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value 1917 """ 1918 # Get all arguments 1919 all_args = [expression.this, *expression.expressions] 1920 fallback_sql = self.function_fallback_sql(expression) 1921 1922 if expression.args.get("ignore_nulls"): 1923 # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs) 1924 return self.sql(fallback_sql) 1925 1926 # return NULL if any argument is NULL 1927 case_expr = exp.case().when( 1928 exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False), 1929 exp.null(), 1930 copy=False, 1931 ) 1932 case_expr.set("default", fallback_sql) 1933 return self.sql(case_expr) 1934 1935 def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str: 1936 return self._greatest_least_sql(expression) 1937 1938 def least_sql(self: DuckDB.Generator, expression: exp.Least) -> str: 1939 return self._greatest_least_sql(expression) 1940 1941 def lambda_sql( 1942 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 1943 ) -> str: 1944 if expression.args.get("colon"): 1945 prefix = "LAMBDA " 1946 arrow_sep = ":" 1947 wrap = False 1948 else: 1949 prefix = "" 1950 1951 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 1952 return f"{prefix}{lambda_sql}" 1953 1954 def show_sql(self, expression: exp.Show) -> str: 1955 return f"SHOW {expression.name}" 1956 1957 def install_sql(self, expression: exp.Install) -> str: 1958 force = "FORCE " if expression.args.get("force") else "" 1959 this = self.sql(expression, "this") 1960 from_clause = expression.args.get("from_") 1961 from_clause = f" FROM {from_clause}" if from_clause else "" 1962 return f"{force}INSTALL {this}{from_clause}" 1963 1964 def approxtopk_sql(self, expression: exp.ApproxTopK) -> str: 1965 self.unsupported( 1966 "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. " 1967 ) 1968 return self.function_fallback_sql(expression) 1969 1970 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 1971 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 1972 1973 def strtotime_sql(self, expression: exp.StrToTime) -> str: 1974 # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants) 1975 target_type = expression.args.get("target_type") 1976 needs_tz = target_type and target_type.this in ( 1977 exp.DataType.Type.TIMESTAMPLTZ, 1978 exp.DataType.Type.TIMESTAMPTZ, 1979 ) 1980 1981 if expression.args.get("safe"): 1982 formatted_time = self.format_time(expression) 1983 cast_type = ( 1984 exp.DataType.Type.TIMESTAMPTZ if needs_tz else exp.DataType.Type.TIMESTAMP 1985 ) 1986 return self.sql( 1987 exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type) 1988 ) 1989 1990 base_sql = str_to_time_sql(self, expression) 1991 if needs_tz: 1992 return self.sql( 1993 exp.cast( 1994 base_sql, 1995 exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ), 1996 ) 1997 ) 1998 return base_sql 1999 2000 def strtodate_sql(self, expression: exp.StrToDate) -> str: 2001 formatted_time = self.format_time(expression) 2002 function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME" 2003 return self.sql( 2004 exp.cast( 2005 self.func(function_name, expression.this, formatted_time), 2006 exp.DataType(this=exp.DataType.Type.DATE), 2007 ) 2008 ) 2009 2010 def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str: 2011 this = expression.this 2012 time_format = self.format_time(expression) 2013 safe = expression.args.get("safe") 2014 time_type = exp.DataType.build("TIME", dialect="duckdb") 2015 cast_expr = exp.TryCast if safe else exp.Cast 2016 2017 if time_format: 2018 func_name = "TRY_STRPTIME" if safe else "STRPTIME" 2019 strptime = exp.Anonymous(this=func_name, expressions=[this, time_format]) 2020 return self.sql(cast_expr(this=strptime, to=time_type)) 2021 2022 if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DataType.Type.TIME): 2023 return self.sql(this) 2024 2025 return self.sql(cast_expr(this=this, to=time_type)) 2026 2027 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2028 if not expression.this: 2029 return "CURRENT_DATE" 2030 2031 expr = exp.Cast( 2032 this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this), 2033 to=exp.DataType(this=exp.DataType.Type.DATE), 2034 ) 2035 return self.sql(expr) 2036 2037 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 2038 arg = expression.this 2039 if expression.args.get("safe"): 2040 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 2041 return self.func("JSON", arg) 2042 2043 def normal_sql(self, expression: exp.Normal) -> str: 2044 """ 2045 Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB. 2046 2047 Uses the Box-Muller transform via NORMAL_TEMPLATE. 2048 """ 2049 mean = expression.this 2050 stddev = expression.args["stddev"] 2051 gen: exp.Expression = expression.args["gen"] 2052 2053 # Build two uniform random values [0, 1) for Box-Muller transform 2054 if isinstance(gen, exp.Rand) and gen.this is None: 2055 u1: exp.Expression = exp.Rand() 2056 u2: exp.Expression = exp.Rand() 2057 else: 2058 # Seeded: derive two values using HASH with different inputs 2059 seed = gen.this if isinstance(gen, exp.Rand) else gen 2060 u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed) 2061 u2 = exp.replace_placeholders( 2062 self.SEEDED_RANDOM_TEMPLATE, 2063 seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)), 2064 ) 2065 2066 replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2} 2067 return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements)) 2068 2069 def uniform_sql(self, expression: exp.Uniform) -> str: 2070 """ 2071 Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB. 2072 2073 UNIFORM returns a random value in [min, max]: 2074 - Integer result if both min and max are integers 2075 - Float result if either min or max is a float 2076 """ 2077 min_val = expression.this 2078 max_val = expression.expression 2079 gen = expression.args.get("gen") 2080 2081 # Determine if result should be integer (both bounds are integers). 2082 # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT 2083 is_int_result = min_val.is_int and max_val.is_int 2084 2085 # Build the random value expression [0, 1) 2086 if not isinstance(gen, exp.Rand): 2087 # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0 2088 random_expr: exp.Expression = exp.Div( 2089 this=exp.Paren( 2090 this=exp.Mod( 2091 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])), 2092 expression=exp.Literal.number(1000000), 2093 ) 2094 ), 2095 expression=exp.Literal.number(1000000.0), 2096 ) 2097 else: 2098 random_expr = exp.Rand() 2099 2100 # Build: min + random * (max - min [+ 1 for int]) 2101 range_expr: exp.Expression = exp.Sub(this=max_val, expression=min_val) 2102 if is_int_result: 2103 range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1)) 2104 2105 result: exp.Expression = exp.Add( 2106 this=min_val, 2107 expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)), 2108 ) 2109 2110 if is_int_result: 2111 result = exp.Cast( 2112 this=exp.Floor(this=result), 2113 to=exp.DataType.build("BIGINT"), 2114 ) 2115 2116 return self.sql(result) 2117 2118 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 2119 nano = expression.args.get("nano") 2120 if nano is not None: 2121 expression.set( 2122 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 2123 ) 2124 2125 return rename_func("MAKE_TIME")(self, expression) 2126 2127 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 2128 sec = expression.args["sec"] 2129 2130 milli = expression.args.get("milli") 2131 if milli is not None: 2132 sec += milli.pop() / exp.Literal.number(1000.0) 2133 2134 nano = expression.args.get("nano") 2135 if nano is not None: 2136 sec += nano.pop() / exp.Literal.number(1000000000.0) 2137 2138 if milli or nano: 2139 expression.set("sec", sec) 2140 2141 return rename_func("MAKE_TIMESTAMP")(self, expression) 2142 2143 def tablesample_sql( 2144 self, 2145 expression: exp.TableSample, 2146 tablesample_keyword: t.Optional[str] = None, 2147 ) -> str: 2148 if not isinstance(expression.parent, exp.Select): 2149 # This sample clause only applies to a single source, not the entire resulting relation 2150 tablesample_keyword = "TABLESAMPLE" 2151 2152 if expression.args.get("size"): 2153 method = expression.args.get("method") 2154 if method and method.name.upper() != "RESERVOIR": 2155 self.unsupported( 2156 f"Sampling method {method} is not supported with a discrete sample count, " 2157 "defaulting to reservoir sampling" 2158 ) 2159 expression.set("method", exp.var("RESERVOIR")) 2160 2161 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 2162 2163 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 2164 if isinstance(expression.parent, exp.UserDefinedFunction): 2165 return self.sql(expression, "this") 2166 return super().columndef_sql(expression, sep) 2167 2168 def join_sql(self, expression: exp.Join) -> str: 2169 if ( 2170 not expression.args.get("using") 2171 and not expression.args.get("on") 2172 and not expression.method 2173 and (expression.kind in ("", "INNER", "OUTER")) 2174 ): 2175 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 2176 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 2177 if isinstance(expression.this, exp.Unnest): 2178 return super().join_sql(expression.on(exp.true())) 2179 2180 expression.set("side", None) 2181 expression.set("kind", None) 2182 2183 return super().join_sql(expression) 2184 2185 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 2186 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 2187 if expression.args.get("is_end_exclusive"): 2188 return rename_func("RANGE")(self, expression) 2189 2190 return self.function_fallback_sql(expression) 2191 2192 def countif_sql(self, expression: exp.CountIf) -> str: 2193 if self.dialect.version >= (1, 2): 2194 return self.function_fallback_sql(expression) 2195 2196 # https://github.com/tobymao/sqlglot/pull/4749 2197 return count_if_to_sum(self, expression) 2198 2199 def bracket_sql(self, expression: exp.Bracket) -> str: 2200 if self.dialect.version >= (1, 2): 2201 return super().bracket_sql(expression) 2202 2203 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 2204 this = expression.this 2205 if isinstance(this, exp.Array): 2206 this.replace(exp.paren(this)) 2207 2208 bracket = super().bracket_sql(expression) 2209 2210 if not expression.args.get("returns_list_for_maps"): 2211 if not this.type: 2212 from sqlglot.optimizer.annotate_types import annotate_types 2213 2214 this = annotate_types(this, dialect=self.dialect) 2215 2216 if this.is_type(exp.DataType.Type.MAP): 2217 bracket = f"({bracket})[1]" 2218 2219 return bracket 2220 2221 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 2222 func = expression.this 2223 2224 # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP 2225 # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y) 2226 if isinstance(func, exp.ArrayAgg): 2227 if not isinstance(order := expression.expression, exp.Order): 2228 return self.sql(func) 2229 2230 # Save the original column for FILTER clause (before wrapping with Order) 2231 original_this = func.this 2232 2233 # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order 2234 # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions) 2235 func.set( 2236 "this", 2237 exp.Order( 2238 this=func.this.copy(), 2239 expressions=order.expressions, 2240 ), 2241 ) 2242 2243 # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed 2244 # Use original_this (not the Order-wrapped version) for the FILTER condition 2245 array_agg_sql = self.function_fallback_sql(func) 2246 return self._add_arrayagg_null_filter(array_agg_sql, func, original_this) 2247 2248 # For other functions (like PERCENTILES), use existing logic 2249 expression_sql = self.sql(expression, "expression") 2250 2251 if isinstance(func, exp.PERCENTILES): 2252 # Make the order key the first arg and slide the fraction to the right 2253 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 2254 order_col = expression.find(exp.Ordered) 2255 if order_col: 2256 func.set("expression", func.this) 2257 func.set("this", order_col.this) 2258 2259 this = self.sql(expression, "this").rstrip(")") 2260 2261 return f"{this}{expression_sql})" 2262 2263 def length_sql(self, expression: exp.Length) -> str: 2264 arg = expression.this 2265 2266 # Dialects like BQ and Snowflake also accept binary values as args, so 2267 # DDB will attempt to infer the type or resort to case/when resolution 2268 if not expression.args.get("binary") or arg.is_string: 2269 return self.func("LENGTH", arg) 2270 2271 if not arg.type: 2272 from sqlglot.optimizer.annotate_types import annotate_types 2273 2274 arg = annotate_types(arg, dialect=self.dialect) 2275 2276 if arg.is_type(*exp.DataType.TEXT_TYPES): 2277 return self.func("LENGTH", arg) 2278 2279 # We need these casts to make duckdb's static type checker happy 2280 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 2281 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 2282 2283 case = ( 2284 exp.case(self.func("TYPEOF", arg)) 2285 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 2286 .else_( 2287 exp.Anonymous(this="LENGTH", expressions=[varchar]) 2288 ) # anonymous to break length_sql recursion 2289 ) 2290 2291 return self.sql(case) 2292 2293 def lower_sql(self, expression: exp.Lower) -> str: 2294 result_sql = self.func("LOWER", _cast_to_varchar(expression.this)) 2295 return _gen_with_cast_to_blob(self, expression, result_sql) 2296 2297 def upper_sql(self, expression: exp.Upper) -> str: 2298 result_sql = self.func("UPPER", _cast_to_varchar(expression.this)) 2299 return _gen_with_cast_to_blob(self, expression, result_sql) 2300 2301 def replace_sql(self, expression: exp.Replace) -> str: 2302 result_sql = self.func( 2303 "REPLACE", 2304 _cast_to_varchar(expression.this), 2305 _cast_to_varchar(expression.expression), 2306 _cast_to_varchar(expression.args.get("replacement")), 2307 ) 2308 return _gen_with_cast_to_blob(self, expression, result_sql) 2309 2310 def _bitwise_op(self, expression: exp.Binary, op: str) -> str: 2311 _prepare_binary_bitwise_args(expression) 2312 result_sql = self.binary(expression, op) 2313 return _gen_with_cast_to_blob(self, expression, result_sql) 2314 2315 def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str: 2316 _prepare_binary_bitwise_args(expression) 2317 result_sql = self.func("XOR", expression.this, expression.expression) 2318 return _gen_with_cast_to_blob(self, expression, result_sql) 2319 2320 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 2321 this = expression.this 2322 key = expression.args.get("key") 2323 key_sql = key.name if isinstance(key, exp.Expression) else "" 2324 value_sql = self.sql(expression, "value") 2325 2326 kv_sql = f"{key_sql} := {value_sql}" 2327 2328 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 2329 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 2330 if isinstance(this, exp.Struct) and not this.expressions: 2331 return self.func("STRUCT_PACK", kv_sql) 2332 2333 return self.func("STRUCT_INSERT", this, kv_sql) 2334 2335 def startswith_sql(self, expression: exp.StartsWith) -> str: 2336 return self.func( 2337 "STARTS_WITH", 2338 _cast_to_varchar(expression.this), 2339 _cast_to_varchar(expression.expression), 2340 ) 2341 2342 def unnest_sql(self, expression: exp.Unnest) -> str: 2343 explode_array = expression.args.get("explode_array") 2344 if explode_array: 2345 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 2346 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 2347 expression.expressions.append( 2348 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 2349 ) 2350 2351 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 2352 alias = expression.args.get("alias") 2353 if isinstance(alias, exp.TableAlias): 2354 expression.set("alias", None) 2355 if alias.columns: 2356 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 2357 2358 unnest_sql = super().unnest_sql(expression) 2359 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 2360 return self.sql(select) 2361 2362 return super().unnest_sql(expression) 2363 2364 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 2365 this = expression.this 2366 2367 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2368 # DuckDB should render IGNORE NULLS only for the general-purpose 2369 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 2370 return super().ignorenulls_sql(expression) 2371 2372 if isinstance(this, exp.First): 2373 this = exp.AnyValue(this=this.this) 2374 2375 if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)): 2376 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 2377 2378 return self.sql(this) 2379 2380 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 2381 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2382 # DuckDB should render RESPECT NULLS only for the general-purpose 2383 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 2384 return super().respectnulls_sql(expression) 2385 2386 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 2387 return self.sql(expression, "this") 2388 2389 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 2390 this = self.sql(expression, "this") 2391 null_text = self.sql(expression, "null") 2392 2393 if null_text: 2394 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 2395 2396 return self.func("ARRAY_TO_STRING", this, expression.expression) 2397 2398 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 2399 this = expression.this 2400 group = expression.args.get("group") 2401 params = expression.args.get("parameters") 2402 position = expression.args.get("position") 2403 occurrence = expression.args.get("occurrence") 2404 null_if_pos_overflow = expression.args.get("null_if_pos_overflow") 2405 2406 if position and (not position.is_int or position.to_py() > 1): 2407 this = exp.Substring(this=this, start=position) 2408 2409 if null_if_pos_overflow: 2410 this = exp.Nullif(this=this, expression=exp.Literal.string("")) 2411 2412 # Do not render group if there is no following argument, 2413 # and it's the default value for this dialect 2414 if ( 2415 not params 2416 and group 2417 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 2418 ): 2419 group = None 2420 2421 if occurrence and (not occurrence.is_int or occurrence.to_py() > 1): 2422 return self.func( 2423 "ARRAY_EXTRACT", 2424 self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params), 2425 exp.Literal.number(occurrence), 2426 ) 2427 2428 return self.func("REGEXP_EXTRACT", this, expression.expression, group, params) 2429 2430 @unsupported_args("culture") 2431 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 2432 fmt = expression.args.get("format") 2433 if fmt and fmt.is_int: 2434 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 2435 2436 self.unsupported("Only integer formats are supported by NumberToStr") 2437 return self.function_fallback_sql(expression) 2438 2439 def autoincrementcolumnconstraint_sql(self, _) -> str: 2440 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 2441 return "" 2442 2443 def aliases_sql(self, expression: exp.Aliases) -> str: 2444 this = expression.this 2445 if isinstance(this, exp.Posexplode): 2446 return self.posexplode_sql(this) 2447 2448 return super().aliases_sql(expression) 2449 2450 def posexplode_sql(self, expression: exp.Posexplode) -> str: 2451 this = expression.this 2452 parent = expression.parent 2453 2454 # The default Spark aliases are "pos" and "col", unless specified otherwise 2455 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 2456 2457 if isinstance(parent, exp.Aliases): 2458 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 2459 pos, col = parent.expressions 2460 elif isinstance(parent, exp.Table): 2461 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 2462 alias = parent.args.get("alias") 2463 if alias: 2464 pos, col = alias.columns or [pos, col] 2465 alias.pop() 2466 2467 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 2468 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 2469 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 2470 gen_subscripts = self.sql( 2471 exp.Alias( 2472 this=exp.Anonymous( 2473 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 2474 ) 2475 - exp.Literal.number(1), 2476 alias=pos, 2477 ) 2478 ) 2479 2480 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 2481 2482 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 2483 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 2484 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 2485 2486 return posexplode_sql 2487 2488 def addmonths_sql(self, expression: exp.AddMonths) -> str: 2489 """ 2490 Handles three key issues: 2491 1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers 2492 2. End-of-month preservation: If input is last day of month, result is last day of result month 2493 3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP) 2494 """ 2495 from sqlglot.optimizer.annotate_types import annotate_types 2496 2497 this = expression.this 2498 if not this.type: 2499 this = annotate_types(this, dialect=self.dialect) 2500 2501 if this.is_type(*exp.DataType.TEXT_TYPES): 2502 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 2503 2504 # Detect float/decimal months to apply rounding (Snowflake behavior) 2505 # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS 2506 months_expr = expression.expression 2507 if not months_expr.type: 2508 months_expr = annotate_types(months_expr, dialect=self.dialect) 2509 2510 # Build interval or to_months expression based on type 2511 # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT)) 2512 interval_or_to_months = ( 2513 exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT")) 2514 if months_expr.is_type( 2515 exp.DataType.Type.FLOAT, 2516 exp.DataType.Type.DOUBLE, 2517 exp.DataType.Type.DECIMAL, 2518 ) 2519 # Integer case: standard INTERVAL N MONTH syntax 2520 else exp.Interval(this=months_expr, unit=exp.var("MONTH")) 2521 ) 2522 2523 date_add_expr = exp.Add(this=this, expression=interval_or_to_months) 2524 2525 # Apply end-of-month preservation if Snowflake flag is set 2526 # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END 2527 preserve_eom = expression.args.get("preserve_end_of_month") 2528 result_expr = ( 2529 exp.case() 2530 .when( 2531 exp.EQ(this=exp.func("LAST_DAY", this), expression=this), 2532 exp.func("LAST_DAY", date_add_expr), 2533 ) 2534 .else_(date_add_expr) 2535 if preserve_eom 2536 else date_add_expr 2537 ) 2538 2539 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 2540 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 2541 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 2542 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 2543 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 2544 return self.sql(exp.Cast(this=result_expr, to=this.type)) 2545 return self.sql(result_expr) 2546 2547 def format_sql(self, expression: exp.Format) -> str: 2548 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2549 return self.func("FORMAT", "'{}'", expression.expressions[0]) 2550 2551 return self.function_fallback_sql(expression) 2552 2553 def hexstring_sql( 2554 self, expression: exp.HexString, binary_function_repr: t.Optional[str] = None 2555 ) -> str: 2556 # UNHEX('FF') correctly produces blob \xFF in DuckDB 2557 return super().hexstring_sql(expression, binary_function_repr="UNHEX") 2558 2559 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 2560 unit = unit_to_str(expression) 2561 date = expression.this 2562 result = self.func("DATE_TRUNC", unit, date) 2563 2564 if expression.args.get("input_type_preserved"): 2565 if not date.type: 2566 from sqlglot.optimizer.annotate_types import annotate_types 2567 2568 date = annotate_types(date, dialect=self.dialect) 2569 2570 if date.type and date.is_type(*exp.DataType.TEMPORAL_TYPES): 2571 return self.sql(exp.Cast(this=result, to=date.type)) 2572 return result 2573 2574 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 2575 unit = unit_to_str(expression) 2576 zone = expression.args.get("zone") 2577 timestamp = expression.this 2578 2579 if is_date_unit(unit) and zone: 2580 # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC. 2581 # Double AT TIME ZONE needed for BigQuery compatibility: 2582 # 1. First AT TIME ZONE: ensures truncation happens in the target timezone 2583 # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component) 2584 timestamp = exp.AtTimeZone(this=timestamp, zone=zone) 2585 result_sql = self.func("DATE_TRUNC", unit, timestamp) 2586 return self.sql(exp.AtTimeZone(this=result_sql, zone=zone)) 2587 2588 result = self.func("DATE_TRUNC", unit, timestamp) 2589 if expression.args.get("input_type_preserved"): 2590 if not timestamp.type: 2591 from sqlglot.optimizer.annotate_types import annotate_types 2592 2593 timestamp = annotate_types(timestamp, dialect=self.dialect) 2594 2595 if timestamp.type and timestamp.is_type( 2596 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ 2597 ): 2598 dummy_date = exp.Cast( 2599 this=exp.Literal.string("1970-01-01"), 2600 to=exp.DataType(this=exp.DataType.Type.DATE), 2601 ) 2602 date_time = exp.Add(this=dummy_date, expression=timestamp) 2603 result = self.func("DATE_TRUNC", unit, date_time) 2604 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2605 2606 if timestamp.type and timestamp.is_type(*exp.DataType.TEMPORAL_TYPES): 2607 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2608 return result 2609 2610 def trim_sql(self, expression: exp.Trim) -> str: 2611 expression.this.replace(_cast_to_varchar(expression.this)) 2612 if expression.expression: 2613 expression.expression.replace(_cast_to_varchar(expression.expression)) 2614 2615 result_sql = super().trim_sql(expression) 2616 return _gen_with_cast_to_blob(self, expression, result_sql) 2617 2618 def round_sql(self, expression: exp.Round) -> str: 2619 this = expression.this 2620 decimals = expression.args.get("decimals") 2621 truncate = expression.args.get("truncate") 2622 2623 # DuckDB requires the scale (decimals) argument to be an INT 2624 # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally 2625 if decimals is not None and expression.args.get("casts_non_integer_decimals"): 2626 if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)): 2627 decimals = exp.cast(decimals, exp.DataType.Type.INT) 2628 2629 func = "ROUND" 2630 if truncate: 2631 # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN 2632 if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"): 2633 func = "ROUND_EVEN" 2634 truncate = None 2635 # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO 2636 elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"): 2637 truncate = None 2638 2639 return self.func(func, this, decimals, truncate) 2640 2641 def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str: 2642 """ 2643 BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values 2644 dividing the input distribution into n equal-sized buckets. 2645 2646 Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery 2647 does not document the specific algorithm used so results may differ. DuckDB does not 2648 support RESPECT NULLS. 2649 """ 2650 this = expression.this 2651 if isinstance(this, exp.Distinct): 2652 # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both 2653 if len(this.expressions) < 2: 2654 self.unsupported("APPROX_QUANTILES requires a bucket count argument") 2655 return self.function_fallback_sql(expression) 2656 num_quantiles_expr = this.expressions[1].pop() 2657 else: 2658 num_quantiles_expr = expression.expression 2659 2660 if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int: 2661 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2662 return self.function_fallback_sql(expression) 2663 2664 num_quantiles = t.cast(int, num_quantiles_expr.to_py()) 2665 if num_quantiles <= 0: 2666 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2667 return self.function_fallback_sql(expression) 2668 2669 quantiles = [ 2670 exp.Literal.number(Decimal(i) / Decimal(num_quantiles)) 2671 for i in range(num_quantiles + 1) 2672 ] 2673 2674 return self.sql( 2675 exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)) 2676 ) 2677 2678 def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str: 2679 if expression.args.get("scalar_only"): 2680 expression = exp.JSONExtractScalar( 2681 this=rename_func("JSON_VALUE")(self, expression), expression="'$'" 2682 ) 2683 return _arrow_json_extract_sql(self, expression) 2684 2685 def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str: 2686 this = expression.this 2687 2688 if _is_binary(this): 2689 expression.type = exp.DataType.build("BINARY") 2690 2691 arg = _cast_to_bit(this) 2692 2693 if isinstance(this, exp.Neg): 2694 arg = exp.Paren(this=arg) 2695 2696 expression.set("this", arg) 2697 2698 result_sql = f"~{self.sql(expression, 'this')}" 2699 2700 return _gen_with_cast_to_blob(self, expression, result_sql) 2701 2702 def window_sql(self, expression: exp.Window) -> str: 2703 this = expression.this 2704 if isinstance(this, exp.Corr) or ( 2705 isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr) 2706 ): 2707 return self._corr_sql(expression) 2708 2709 return super().window_sql(expression) 2710 2711 def filter_sql(self, expression: exp.Filter) -> str: 2712 if isinstance(expression.this, exp.Corr): 2713 return self._corr_sql(expression) 2714 2715 return super().filter_sql(expression) 2716 2717 def _corr_sql( 2718 self, 2719 expression: t.Union[exp.Filter, exp.Window, exp.Corr], 2720 ) -> str: 2721 if isinstance(expression, exp.Corr) and not expression.args.get( 2722 "null_on_zero_variance" 2723 ): 2724 return self.func("CORR", expression.this, expression.expression) 2725 2726 corr_expr = _maybe_corr_null_to_false(expression) 2727 if corr_expr is None: 2728 if isinstance(expression, exp.Window): 2729 return super().window_sql(expression) 2730 if isinstance(expression, exp.Filter): 2731 return super().filter_sql(expression) 2732 corr_expr = expression # make mypy happy 2733 2734 return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
993 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 994 if isinstance(path, exp.Literal): 995 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 996 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 997 # This check ensures we'll avoid trying to parse these as JSON paths, which can 998 # either result in a noisy warning or in an invalid representation of the path. 999 path_text = path.name 1000 if path_text.startswith("/") or "[#" in path_text: 1001 return path 1002 1003 return super().to_json_path(path)
1005 class Tokenizer(tokens.Tokenizer): 1006 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 1007 HEREDOC_STRINGS = ["$"] 1008 1009 HEREDOC_TAG_IS_IDENTIFIER = True 1010 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 1011 1012 KEYWORDS = { 1013 **tokens.Tokenizer.KEYWORDS, 1014 "//": TokenType.DIV, 1015 "**": TokenType.DSTAR, 1016 "^@": TokenType.CARET_AT, 1017 "@>": TokenType.AT_GT, 1018 "<@": TokenType.LT_AT, 1019 "ATTACH": TokenType.ATTACH, 1020 "BINARY": TokenType.VARBINARY, 1021 "BITSTRING": TokenType.BIT, 1022 "BPCHAR": TokenType.TEXT, 1023 "CHAR": TokenType.TEXT, 1024 "DATETIME": TokenType.TIMESTAMPNTZ, 1025 "DETACH": TokenType.DETACH, 1026 "FORCE": TokenType.FORCE, 1027 "INSTALL": TokenType.INSTALL, 1028 "INT8": TokenType.BIGINT, 1029 "LOGICAL": TokenType.BOOLEAN, 1030 "MACRO": TokenType.FUNCTION, 1031 "ONLY": TokenType.ONLY, 1032 "PIVOT_WIDER": TokenType.PIVOT, 1033 "POSITIONAL": TokenType.POSITIONAL, 1034 "RESET": TokenType.COMMAND, 1035 "ROW": TokenType.STRUCT, 1036 "SIGNED": TokenType.INT, 1037 "STRING": TokenType.TEXT, 1038 "SUMMARIZE": TokenType.SUMMARIZE, 1039 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 1040 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 1041 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 1042 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 1043 "TIMESTAMP_US": TokenType.TIMESTAMP, 1044 "UBIGINT": TokenType.UBIGINT, 1045 "UINTEGER": TokenType.UINT, 1046 "USMALLINT": TokenType.USMALLINT, 1047 "UTINYINT": TokenType.UTINYINT, 1048 "VARCHAR": TokenType.TEXT, 1049 } 1050 KEYWORDS.pop("/*+") 1051 1052 SINGLE_TOKENS = { 1053 **tokens.Tokenizer.SINGLE_TOKENS, 1054 "$": TokenType.PARAMETER, 1055 } 1056 1057 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1059 class Parser(parser.Parser): 1060 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 1061 1062 BITWISE = parser.Parser.BITWISE.copy() 1063 BITWISE.pop(TokenType.CARET) 1064 1065 RANGE_PARSERS = { 1066 **parser.Parser.RANGE_PARSERS, 1067 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 1068 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 1069 TokenType.TILDA: binary_range_parser(exp.RegexpFullMatch), 1070 } 1071 1072 EXPONENT = { 1073 **parser.Parser.EXPONENT, 1074 TokenType.CARET: exp.Pow, 1075 TokenType.DSTAR: exp.Pow, 1076 } 1077 1078 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 1079 1080 SHOW_PARSERS = { 1081 "TABLES": _show_parser("TABLES"), 1082 "ALL TABLES": _show_parser("ALL TABLES"), 1083 } 1084 1085 FUNCTIONS = { 1086 **parser.Parser.FUNCTIONS, 1087 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 1088 "ARRAY_PREPEND": _build_array_prepend, 1089 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 1090 "ARRAY_SORT": exp.SortArray.from_arg_list, 1091 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 1092 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 1093 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 1094 "DATEDIFF": _build_date_diff, 1095 "DATE_DIFF": _build_date_diff, 1096 "DATE_TRUNC": date_trunc_to_time, 1097 "DATETRUNC": date_trunc_to_time, 1098 "DECODE": lambda args: exp.Decode( 1099 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 1100 ), 1101 "EDITDIST3": exp.Levenshtein.from_arg_list, 1102 "ENCODE": lambda args: exp.Encode( 1103 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 1104 ), 1105 "EPOCH": exp.TimeToUnix.from_arg_list, 1106 "EPOCH_MS": lambda args: exp.UnixToTime( 1107 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 1108 ), 1109 "GENERATE_SERIES": _build_generate_series(), 1110 "GET_BIT": lambda args: exp.Getbit( 1111 this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True 1112 ), 1113 "JSON": exp.ParseJSON.from_arg_list, 1114 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 1115 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 1116 "LIST_APPEND": exp.ArrayAppend.from_arg_list, 1117 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 1118 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 1119 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 1120 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 1121 "LIST_HAS": exp.ArrayContains.from_arg_list, 1122 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 1123 "LIST_PREPEND": _build_array_prepend, 1124 "LIST_REVERSE_SORT": _build_sort_array_desc, 1125 "LIST_SORT": exp.SortArray.from_arg_list, 1126 "LIST_TRANSFORM": exp.Transform.from_arg_list, 1127 "LIST_VALUE": lambda args: exp.Array(expressions=args), 1128 "MAKE_DATE": exp.DateFromParts.from_arg_list, 1129 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 1130 "MAKE_TIMESTAMP": _build_make_timestamp, 1131 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 1132 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 1133 "RANGE": _build_generate_series(end_exclusive=True), 1134 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 1135 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 1136 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 1137 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 1138 this=seq_get(args, 0), 1139 expression=seq_get(args, 1), 1140 replacement=seq_get(args, 2), 1141 modifiers=seq_get(args, 3), 1142 single_replace=True, 1143 ), 1144 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 1145 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 1146 "STRING_SPLIT": exp.Split.from_arg_list, 1147 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 1148 "STRING_TO_ARRAY": exp.Split.from_arg_list, 1149 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 1150 "STRUCT_PACK": exp.Struct.from_arg_list, 1151 "STR_SPLIT": exp.Split.from_arg_list, 1152 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 1153 "TIME_BUCKET": exp.DateBin.from_arg_list, 1154 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 1155 "UNNEST": exp.Explode.from_arg_list, 1156 "XOR": binary_from_function(exp.BitwiseXor), 1157 } 1158 1159 FUNCTIONS.pop("DATE_SUB") 1160 FUNCTIONS.pop("GLOB") 1161 1162 FUNCTION_PARSERS = { 1163 **parser.Parser.FUNCTION_PARSERS, 1164 **dict.fromkeys( 1165 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 1166 ), 1167 } 1168 FUNCTION_PARSERS.pop("DECODE") 1169 1170 NO_PAREN_FUNCTION_PARSERS = { 1171 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 1172 "MAP": lambda self: self._parse_map(), 1173 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 1174 } 1175 1176 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 1177 TokenType.SEMI, 1178 TokenType.ANTI, 1179 } 1180 1181 PLACEHOLDER_PARSERS = { 1182 **parser.Parser.PLACEHOLDER_PARSERS, 1183 TokenType.PARAMETER: lambda self: ( 1184 self.expression(exp.Placeholder, this=self._prev.text) 1185 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 1186 else None 1187 ), 1188 } 1189 1190 TYPE_CONVERTERS = { 1191 # https://duckdb.org/docs/sql/data_types/numeric 1192 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 1193 # https://duckdb.org/docs/sql/data_types/text 1194 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 1195 } 1196 1197 STATEMENT_PARSERS = { 1198 **parser.Parser.STATEMENT_PARSERS, 1199 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 1200 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 1201 TokenType.FORCE: lambda self: self._parse_force(), 1202 TokenType.INSTALL: lambda self: self._parse_install(), 1203 TokenType.SHOW: lambda self: self._parse_show(), 1204 } 1205 1206 SET_PARSERS = { 1207 **parser.Parser.SET_PARSERS, 1208 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 1209 } 1210 1211 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 1212 index = self._index 1213 if not self._match_text_seq("LAMBDA"): 1214 return super()._parse_lambda(alias=alias) 1215 1216 expressions = self._parse_csv(self._parse_lambda_arg) 1217 if not self._match(TokenType.COLON): 1218 self._retreat(index) 1219 return None 1220 1221 this = self._replace_lambda(self._parse_assignment(), expressions) 1222 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 1223 1224 def _parse_expression(self) -> t.Optional[exp.Expression]: 1225 # DuckDB supports prefix aliases, e.g. foo: 1 1226 if self._next and self._next.token_type == TokenType.COLON: 1227 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 1228 self._match(TokenType.COLON) 1229 comments = self._prev_comments or [] 1230 1231 this = self._parse_assignment() 1232 if isinstance(this, exp.Expression): 1233 # Moves the comment next to the alias in `alias: expr /* comment */` 1234 comments += this.pop_comments() or [] 1235 1236 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 1237 1238 return super()._parse_expression() 1239 1240 def _parse_table( 1241 self, 1242 schema: bool = False, 1243 joins: bool = False, 1244 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1245 parse_bracket: bool = False, 1246 is_db_reference: bool = False, 1247 parse_partition: bool = False, 1248 consume_pipe: bool = False, 1249 ) -> t.Optional[exp.Expression]: 1250 # DuckDB supports prefix aliases, e.g. FROM foo: bar 1251 if self._next and self._next.token_type == TokenType.COLON: 1252 alias = self._parse_table_alias( 1253 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1254 ) 1255 self._match(TokenType.COLON) 1256 comments = self._prev_comments or [] 1257 else: 1258 alias = None 1259 comments = [] 1260 1261 table = super()._parse_table( 1262 schema=schema, 1263 joins=joins, 1264 alias_tokens=alias_tokens, 1265 parse_bracket=parse_bracket, 1266 is_db_reference=is_db_reference, 1267 parse_partition=parse_partition, 1268 ) 1269 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 1270 # Moves the comment next to the alias in `alias: table /* comment */` 1271 comments += table.pop_comments() or [] 1272 alias.comments = alias.pop_comments() + comments 1273 table.set("alias", alias) 1274 1275 return table 1276 1277 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 1278 # https://duckdb.org/docs/sql/samples.html 1279 sample = super()._parse_table_sample(as_modifier=as_modifier) 1280 if sample and not sample.args.get("method"): 1281 if sample.args.get("size"): 1282 sample.set("method", exp.var("RESERVOIR")) 1283 else: 1284 sample.set("method", exp.var("SYSTEM")) 1285 1286 return sample 1287 1288 def _parse_bracket( 1289 self, this: t.Optional[exp.Expression] = None 1290 ) -> t.Optional[exp.Expression]: 1291 bracket = super()._parse_bracket(this) 1292 1293 if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket): 1294 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1295 bracket.set("returns_list_for_maps", True) 1296 1297 return bracket 1298 1299 def _parse_map(self) -> exp.ToMap | exp.Map: 1300 if self._match(TokenType.L_BRACE, advance=False): 1301 return self.expression(exp.ToMap, this=self._parse_bracket()) 1302 1303 args = self._parse_wrapped_csv(self._parse_assignment) 1304 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 1305 1306 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 1307 return self._parse_field_def() 1308 1309 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 1310 if len(aggregations) == 1: 1311 return super()._pivot_column_names(aggregations) 1312 return pivot_column_names(aggregations, dialect="duckdb") 1313 1314 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 1315 def _parse_attach_option() -> exp.AttachOption: 1316 return self.expression( 1317 exp.AttachOption, 1318 this=self._parse_var(any_token=True), 1319 expression=self._parse_field(any_token=True), 1320 ) 1321 1322 self._match(TokenType.DATABASE) 1323 exists = self._parse_exists(not_=is_attach) 1324 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 1325 1326 if self._match(TokenType.L_PAREN, advance=False): 1327 expressions = self._parse_wrapped_csv(_parse_attach_option) 1328 else: 1329 expressions = None 1330 1331 return ( 1332 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 1333 if is_attach 1334 else self.expression(exp.Detach, this=this, exists=exists) 1335 ) 1336 1337 def _parse_show_duckdb(self, this: str) -> exp.Show: 1338 return self.expression(exp.Show, this=this) 1339 1340 def _parse_force(self) -> exp.Install | exp.Command: 1341 # FORCE can only be followed by INSTALL or CHECKPOINT 1342 # In the case of CHECKPOINT, we fallback 1343 if not self._match(TokenType.INSTALL): 1344 return self._parse_as_command(self._prev) 1345 1346 return self._parse_install(force=True) 1347 1348 def _parse_install(self, force: bool = False) -> exp.Install: 1349 return self.expression( 1350 exp.Install, 1351 this=self._parse_id_var(), 1352 from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None, 1353 force=force, 1354 ) 1355 1356 def _parse_primary(self) -> t.Optional[exp.Expression]: 1357 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 1358 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 1359 1360 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1362 class Generator(generator.Generator): 1363 PARAMETER_TOKEN = "$" 1364 NAMED_PLACEHOLDER_TOKEN = "$" 1365 JOIN_HINTS = False 1366 TABLE_HINTS = False 1367 QUERY_HINTS = False 1368 LIMIT_FETCH = "LIMIT" 1369 STRUCT_DELIMITER = ("(", ")") 1370 RENAME_TABLE_WITH_DB = False 1371 NVL2_SUPPORTED = False 1372 SEMI_ANTI_JOIN_WITH_SIDE = False 1373 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 1374 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 1375 LAST_DAY_SUPPORTS_DATE_PART = False 1376 JSON_KEY_VALUE_PAIR_SEP = "," 1377 IGNORE_NULLS_IN_FUNC = True 1378 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 1379 SUPPORTS_CREATE_TABLE_LIKE = False 1380 MULTI_ARG_DISTINCT = False 1381 CAN_IMPLEMENT_ARRAY_ANY = True 1382 SUPPORTS_TO_NUMBER = False 1383 SUPPORTS_WINDOW_EXCLUDE = True 1384 COPY_HAS_INTO_KEYWORD = False 1385 STAR_EXCEPT = "EXCLUDE" 1386 PAD_FILL_PATTERN_IS_REQUIRED = True 1387 ARRAY_CONCAT_IS_VAR_LEN = False 1388 ARRAY_SIZE_DIM_REQUIRED = False 1389 NORMALIZE_EXTRACT_DATE_PARTS = True 1390 SUPPORTS_LIKE_QUANTIFIERS = False 1391 SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True 1392 1393 TRANSFORMS = { 1394 **generator.Generator.TRANSFORMS, 1395 exp.AnyValue: _anyvalue_sql, 1396 exp.ApproxDistinct: approx_count_distinct_sql, 1397 exp.Boolnot: lambda self, e: f"NOT ({self.sql(e, 'this')})", 1398 exp.Array: transforms.preprocess( 1399 [transforms.inherit_struct_field_names], 1400 generator=inline_array_unless_query, 1401 ), 1402 exp.ArrayAppend: rename_func("LIST_APPEND"), 1403 exp.ArrayFilter: rename_func("LIST_FILTER"), 1404 exp.ArrayRemove: remove_from_array_using_filter, 1405 exp.ArraySort: _array_sort_sql, 1406 exp.ArrayPrepend: lambda self, e: self.func("LIST_PREPEND", e.expression, e.this), 1407 exp.ArraySum: rename_func("LIST_SUM"), 1408 exp.ArrayUniqueAgg: lambda self, e: self.func( 1409 "LIST", exp.Distinct(expressions=[e.this]) 1410 ), 1411 exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"), 1412 exp.BitwiseAndAgg: _bitwise_agg_sql, 1413 exp.BitwiseLeftShift: _bitshift_sql, 1414 exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"), 1415 exp.BitwiseOrAgg: _bitwise_agg_sql, 1416 exp.BitwiseRightShift: _bitshift_sql, 1417 exp.BitwiseXorAgg: _bitwise_agg_sql, 1418 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 1419 exp.Corr: lambda self, e: self._corr_sql(e), 1420 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 1421 exp.CurrentTime: lambda *_: "CURRENT_TIME", 1422 exp.CurrentTimestamp: lambda self, e: self.sql( 1423 exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC")) 1424 ) 1425 if e.args.get("sysdate") 1426 else "CURRENT_TIMESTAMP", 1427 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1428 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1429 exp.DayOfWeekIso: rename_func("ISODOW"), 1430 exp.DayOfYear: rename_func("DAYOFYEAR"), 1431 exp.Dayname: lambda self, e: ( 1432 self.func("STRFTIME", e.this, exp.Literal.string("%a")) 1433 if e.args.get("abbreviated") 1434 else self.func("DAYNAME", e.this) 1435 ), 1436 exp.Monthname: lambda self, e: ( 1437 self.func("STRFTIME", e.this, exp.Literal.string("%b")) 1438 if e.args.get("abbreviated") 1439 else self.func("MONTHNAME", e.this) 1440 ), 1441 exp.DataType: _datatype_sql, 1442 exp.Date: _date_sql, 1443 exp.DateAdd: _date_delta_to_binary_interval_op(), 1444 exp.DateFromParts: _date_from_parts_sql, 1445 exp.DateSub: _date_delta_to_binary_interval_op(), 1446 exp.DateDiff: _date_diff_sql, 1447 exp.DateStrToDate: datestrtodate_sql, 1448 exp.Datetime: no_datetime_sql, 1449 exp.DatetimeDiff: _date_diff_sql, 1450 exp.DatetimeSub: _date_delta_to_binary_interval_op(), 1451 exp.DatetimeAdd: _date_delta_to_binary_interval_op(), 1452 exp.DateToDi: lambda self, 1453 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 1454 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 1455 exp.DiToDate: lambda self, 1456 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 1457 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 1458 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 1459 exp.GenerateDateArray: _generate_datetime_array_sql, 1460 exp.GenerateTimestampArray: _generate_datetime_array_sql, 1461 exp.Getbit: getbit_sql, 1462 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 1463 exp.Explode: rename_func("UNNEST"), 1464 exp.IntDiv: lambda self, e: self.binary(e, "//"), 1465 exp.IsInf: rename_func("ISINF"), 1466 exp.IsNan: rename_func("ISNAN"), 1467 exp.Ceil: _ceil_floor, 1468 exp.Floor: _ceil_floor, 1469 exp.JSONBExists: rename_func("JSON_EXISTS"), 1470 exp.JSONExtract: _arrow_json_extract_sql, 1471 exp.JSONExtractArray: _json_extract_value_array_sql, 1472 exp.JSONFormat: _json_format_sql, 1473 exp.JSONValueArray: _json_extract_value_array_sql, 1474 exp.Lateral: explode_to_unnest_sql, 1475 exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)), 1476 exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)), 1477 exp.BoolxorAgg: _boolxor_agg_sql, 1478 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 1479 exp.Initcap: _initcap_sql, 1480 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 1481 exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)), 1482 exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)), 1483 exp.MonthsBetween: months_between_sql, 1484 exp.PercentileCont: rename_func("QUANTILE_CONT"), 1485 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 1486 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 1487 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 1488 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 1489 exp.RegexpReplace: lambda self, e: self.func( 1490 "REGEXP_REPLACE", 1491 e.this, 1492 e.expression, 1493 e.args.get("replacement"), 1494 regexp_replace_global_modifier(e), 1495 ), 1496 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 1497 exp.RegexpILike: lambda self, e: self.func( 1498 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 1499 ), 1500 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 1501 exp.RegrValx: _regr_val_sql, 1502 exp.RegrValy: _regr_val_sql, 1503 exp.Return: lambda self, e: self.sql(e, "this"), 1504 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 1505 exp.Rand: rename_func("RANDOM"), 1506 exp.SHA: rename_func("SHA1"), 1507 exp.SHA2: sha256_sql, 1508 exp.Split: rename_func("STR_SPLIT"), 1509 exp.SortArray: _sort_array_sql, 1510 exp.StrPosition: strposition_sql, 1511 exp.StrToUnix: lambda self, e: self.func( 1512 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 1513 ), 1514 exp.Struct: _struct_sql, 1515 exp.Transform: rename_func("LIST_TRANSFORM"), 1516 exp.TimeAdd: _date_delta_to_binary_interval_op(), 1517 exp.TimeSub: _date_delta_to_binary_interval_op(), 1518 exp.Time: no_time_sql, 1519 exp.TimeDiff: _timediff_sql, 1520 exp.Timestamp: no_timestamp_sql, 1521 exp.TimestampAdd: _date_delta_to_binary_interval_op(), 1522 exp.TimestampDiff: lambda self, e: self.func( 1523 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 1524 ), 1525 exp.TimestampSub: _date_delta_to_binary_interval_op(), 1526 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 1527 exp.TimeStrToTime: timestrtotime_sql, 1528 exp.TimeStrToUnix: lambda self, e: self.func( 1529 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 1530 ), 1531 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 1532 exp.ToBoolean: _to_boolean_sql, 1533 exp.TimeToUnix: rename_func("EPOCH"), 1534 exp.TsOrDiToDi: lambda self, 1535 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 1536 exp.TsOrDsAdd: _date_delta_to_binary_interval_op(), 1537 exp.TsOrDsDiff: lambda self, e: self.func( 1538 "DATE_DIFF", 1539 f"'{e.args.get('unit') or 'DAY'}'", 1540 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 1541 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 1542 ), 1543 exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)), 1544 exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)), 1545 exp.UnixSeconds: lambda self, e: self.sql( 1546 exp.cast( 1547 self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DataType.Type.BIGINT 1548 ) 1549 ), 1550 exp.UnixToStr: lambda self, e: self.func( 1551 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 1552 ), 1553 exp.DatetimeTrunc: lambda self, e: self.func( 1554 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 1555 ), 1556 exp.UnixToTime: _unix_to_time_sql, 1557 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 1558 exp.VariancePop: rename_func("VAR_POP"), 1559 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1560 exp.YearOfWeek: lambda self, e: self.sql( 1561 exp.Extract( 1562 this=exp.Var(this="ISOYEAR"), 1563 expression=e.this, 1564 ) 1565 ), 1566 exp.YearOfWeekIso: lambda self, e: self.sql( 1567 exp.Extract( 1568 this=exp.Var(this="ISOYEAR"), 1569 expression=e.this, 1570 ) 1571 ), 1572 exp.Xor: bool_xor_sql, 1573 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1574 rename_func("LEVENSHTEIN") 1575 ), 1576 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 1577 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 1578 exp.DateBin: rename_func("TIME_BUCKET"), 1579 exp.LastDay: _last_day_sql, 1580 } 1581 1582 SUPPORTED_JSON_PATH_PARTS = { 1583 exp.JSONPathKey, 1584 exp.JSONPathRoot, 1585 exp.JSONPathSubscript, 1586 exp.JSONPathWildcard, 1587 } 1588 1589 TYPE_MAPPING = { 1590 **generator.Generator.TYPE_MAPPING, 1591 exp.DataType.Type.BINARY: "BLOB", 1592 exp.DataType.Type.BPCHAR: "TEXT", 1593 exp.DataType.Type.CHAR: "TEXT", 1594 exp.DataType.Type.DATETIME: "TIMESTAMP", 1595 exp.DataType.Type.DECFLOAT: "DECIMAL(38, 5)", 1596 exp.DataType.Type.FLOAT: "REAL", 1597 exp.DataType.Type.JSONB: "JSON", 1598 exp.DataType.Type.NCHAR: "TEXT", 1599 exp.DataType.Type.NVARCHAR: "TEXT", 1600 exp.DataType.Type.UINT: "UINTEGER", 1601 exp.DataType.Type.VARBINARY: "BLOB", 1602 exp.DataType.Type.ROWVERSION: "BLOB", 1603 exp.DataType.Type.VARCHAR: "TEXT", 1604 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMPTZ", 1605 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 1606 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 1607 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 1608 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 1609 exp.DataType.Type.BIGDECIMAL: "DECIMAL(38, 5)", 1610 } 1611 1612 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 1613 RESERVED_KEYWORDS = { 1614 "array", 1615 "analyse", 1616 "union", 1617 "all", 1618 "when", 1619 "in_p", 1620 "default", 1621 "create_p", 1622 "window", 1623 "asymmetric", 1624 "to", 1625 "else", 1626 "localtime", 1627 "from", 1628 "end_p", 1629 "select", 1630 "current_date", 1631 "foreign", 1632 "with", 1633 "grant", 1634 "session_user", 1635 "or", 1636 "except", 1637 "references", 1638 "fetch", 1639 "limit", 1640 "group_p", 1641 "leading", 1642 "into", 1643 "collate", 1644 "offset", 1645 "do", 1646 "then", 1647 "localtimestamp", 1648 "check_p", 1649 "lateral_p", 1650 "current_role", 1651 "where", 1652 "asc_p", 1653 "placing", 1654 "desc_p", 1655 "user", 1656 "unique", 1657 "initially", 1658 "column", 1659 "both", 1660 "some", 1661 "as", 1662 "any", 1663 "only", 1664 "deferrable", 1665 "null_p", 1666 "current_time", 1667 "true_p", 1668 "table", 1669 "case", 1670 "trailing", 1671 "variadic", 1672 "for", 1673 "on", 1674 "distinct", 1675 "false_p", 1676 "not", 1677 "constraint", 1678 "current_timestamp", 1679 "returning", 1680 "primary", 1681 "intersect", 1682 "having", 1683 "analyze", 1684 "current_user", 1685 "and", 1686 "cast", 1687 "symmetric", 1688 "using", 1689 "order", 1690 "current_catalog", 1691 } 1692 1693 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 1694 1695 # DuckDB doesn't generally support CREATE TABLE .. properties 1696 # https://duckdb.org/docs/sql/statements/create_table.html 1697 PROPERTIES_LOCATION = { 1698 prop: exp.Properties.Location.UNSUPPORTED 1699 for prop in generator.Generator.PROPERTIES_LOCATION 1700 } 1701 1702 # There are a few exceptions (e.g. temporary tables) which are supported or 1703 # can be transpiled to DuckDB, so we explicitly override them accordingly 1704 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 1705 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 1706 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 1707 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 1708 1709 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 1710 exp.FirstValue, 1711 exp.Lag, 1712 exp.LastValue, 1713 exp.Lead, 1714 exp.NthValue, 1715 ) 1716 1717 # Template for ZIPF transpilation - placeholders get replaced with actual parameters 1718 ZIPF_TEMPLATE: exp.Expression = exp.maybe_parse( 1719 """ 1720 WITH rand AS (SELECT :random_expr AS r), 1721 weights AS ( 1722 SELECT i, 1.0 / POWER(i, :s) AS w 1723 FROM RANGE(1, :n + 1) AS t(i) 1724 ), 1725 cdf AS ( 1726 SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p 1727 FROM weights 1728 ) 1729 SELECT MIN(i) 1730 FROM cdf 1731 WHERE p >= (SELECT r FROM rand) 1732 """ 1733 ) 1734 1735 # Template for NORMAL transpilation using Box-Muller transform 1736 # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2)) 1737 NORMAL_TEMPLATE: exp.Expression = exp.maybe_parse( 1738 ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))" 1739 ) 1740 1741 # Template for generating a seeded pseudo-random value in [0, 1) from a hash 1742 SEEDED_RANDOM_TEMPLATE: exp.Expression = exp.maybe_parse( 1743 "(ABS(HASH(:seed)) % 1000000) / 1000000.0" 1744 ) 1745 1746 # Template for RANDSTR transpilation - placeholders get replaced with actual parameters 1747 RANDSTR_TEMPLATE: exp.Expression = exp.maybe_parse( 1748 f""" 1749 SELECT LISTAGG( 1750 SUBSTRING( 1751 '{RANDSTR_CHAR_POOL}', 1752 1 + CAST(FLOOR(random_value * 62) AS INT), 1753 1 1754 ), 1755 '' 1756 ) 1757 FROM ( 1758 SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value 1759 FROM RANGE(:length) AS t(i) 1760 ) 1761 """, 1762 ) 1763 1764 def bitmapbucketnumber_sql( 1765 self: DuckDB.Generator, expression: exp.BitmapBucketNumber 1766 ) -> str: 1767 """ 1768 Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent. 1769 1770 Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where: 1771 - Each bucket covers 32,768 values 1772 - Bucket numbering starts at 1 1773 - Formula: ((value - 1) // 32768) + 1 for positive values 1774 1775 For non-positive values (0 and negative), we use value // 32768 to avoid 1776 producing bucket 0 or positive bucket IDs for negative inputs. 1777 """ 1778 value = expression.this 1779 1780 positive_formula = ((value - 1) // 32768) + 1 1781 non_positive_formula = value // 32768 1782 1783 # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END 1784 case_expr = ( 1785 exp.case() 1786 .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula) 1787 .else_(non_positive_formula) 1788 ) 1789 return self.sql(case_expr) 1790 1791 def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str: 1792 """ 1793 Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression. 1794 1795 Snowflake's BITMAP_BIT_POSITION behavior: 1796 - For n <= 0: returns ABS(n) % 32768 1797 - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767) 1798 """ 1799 this = expression.this 1800 1801 return self.sql( 1802 exp.Mod( 1803 this=exp.Paren( 1804 this=exp.If( 1805 this=exp.GT(this=this, expression=exp.Literal.number(0)), 1806 true=this - exp.Literal.number(1), 1807 false=exp.Abs(this=this), 1808 ) 1809 ), 1810 expression=MAX_BIT_POSITION, 1811 ) 1812 ) 1813 1814 def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str: 1815 """ 1816 Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. 1817 Uses a pre-parsed template with placeholders replaced by expression nodes. 1818 1819 RANDSTR(length, generator) generates a random string of specified length. 1820 - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result) 1821 - With RANDOM(): Use RANDOM() in the hash for non-deterministic output 1822 - No generator: Use default seed value 1823 """ 1824 length = expression.this 1825 generator = expression.args.get("generator") 1826 1827 if generator: 1828 if isinstance(generator, exp.Rand): 1829 # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself 1830 seed_value = generator.this or generator 1831 else: 1832 # Const/int or other expression - use as seed directly 1833 seed_value = generator 1834 else: 1835 # No generator specified, use default seed (arbitrary but deterministic) 1836 seed_value = exp.Literal.number(RANDSTR_SEED) 1837 1838 replacements = {"seed": seed_value, "length": length} 1839 return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})" 1840 1841 def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str: 1842 """ 1843 Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. 1844 Uses a pre-parsed template with placeholders replaced by expression nodes. 1845 """ 1846 s = expression.this 1847 n = expression.args["elementcount"] 1848 gen = expression.args["gen"] 1849 1850 if not isinstance(gen, exp.Rand): 1851 # (ABS(HASH(seed)) % 1000000) / 1000000.0 1852 random_expr: exp.Expression = exp.Div( 1853 this=exp.Paren( 1854 this=exp.Mod( 1855 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])), 1856 expression=exp.Literal.number(1000000), 1857 ) 1858 ), 1859 expression=exp.Literal.number(1000000.0), 1860 ) 1861 else: 1862 # Use RANDOM() for non-deterministic output 1863 random_expr = exp.Rand() 1864 1865 replacements = {"s": s, "n": n, "random_expr": random_expr} 1866 return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})" 1867 1868 def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str: 1869 """ 1870 TO_BINARY and TRY_TO_BINARY transpilation: 1871 - 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50') 1872 - 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST') 1873 - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==') 1874 1875 For TRY_TO_BINARY (safe=True), wrap with TRY(): 1876 - 'HEX': TRY_TO_BINARY('invalid', 'HEX') → TRY(UNHEX('invalid')) 1877 """ 1878 value = expression.this 1879 format_arg = expression.args.get("format") 1880 is_safe = expression.args.get("safe") 1881 1882 fmt = "HEX" 1883 if format_arg: 1884 fmt = format_arg.name.upper() 1885 1886 if expression.is_type(exp.DataType.Type.BINARY): 1887 if fmt == "UTF-8": 1888 result = self.func("ENCODE", value) 1889 elif fmt == "BASE64": 1890 result = self.func("FROM_BASE64", value) 1891 elif fmt == "HEX": 1892 result = self.func("UNHEX", value) 1893 else: 1894 if is_safe: 1895 return self.sql(exp.null()) 1896 else: 1897 self.unsupported(f"format {fmt} is not supported") 1898 result = self.func("TO_BINARY", value) 1899 1900 # Wrap with TRY() for TRY_TO_BINARY 1901 if is_safe: 1902 result = self.func("TRY", result) 1903 1904 return result 1905 1906 # Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake 1907 return self.func("TO_BINARY", value) 1908 1909 def _greatest_least_sql( 1910 self: DuckDB.Generator, expression: exp.Greatest | exp.Least 1911 ) -> str: 1912 """ 1913 Handle GREATEST/LEAST functions with dialect-aware NULL behavior. 1914 1915 - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL 1916 - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value 1917 """ 1918 # Get all arguments 1919 all_args = [expression.this, *expression.expressions] 1920 fallback_sql = self.function_fallback_sql(expression) 1921 1922 if expression.args.get("ignore_nulls"): 1923 # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs) 1924 return self.sql(fallback_sql) 1925 1926 # return NULL if any argument is NULL 1927 case_expr = exp.case().when( 1928 exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False), 1929 exp.null(), 1930 copy=False, 1931 ) 1932 case_expr.set("default", fallback_sql) 1933 return self.sql(case_expr) 1934 1935 def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str: 1936 return self._greatest_least_sql(expression) 1937 1938 def least_sql(self: DuckDB.Generator, expression: exp.Least) -> str: 1939 return self._greatest_least_sql(expression) 1940 1941 def lambda_sql( 1942 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 1943 ) -> str: 1944 if expression.args.get("colon"): 1945 prefix = "LAMBDA " 1946 arrow_sep = ":" 1947 wrap = False 1948 else: 1949 prefix = "" 1950 1951 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 1952 return f"{prefix}{lambda_sql}" 1953 1954 def show_sql(self, expression: exp.Show) -> str: 1955 return f"SHOW {expression.name}" 1956 1957 def install_sql(self, expression: exp.Install) -> str: 1958 force = "FORCE " if expression.args.get("force") else "" 1959 this = self.sql(expression, "this") 1960 from_clause = expression.args.get("from_") 1961 from_clause = f" FROM {from_clause}" if from_clause else "" 1962 return f"{force}INSTALL {this}{from_clause}" 1963 1964 def approxtopk_sql(self, expression: exp.ApproxTopK) -> str: 1965 self.unsupported( 1966 "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. " 1967 ) 1968 return self.function_fallback_sql(expression) 1969 1970 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 1971 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 1972 1973 def strtotime_sql(self, expression: exp.StrToTime) -> str: 1974 # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants) 1975 target_type = expression.args.get("target_type") 1976 needs_tz = target_type and target_type.this in ( 1977 exp.DataType.Type.TIMESTAMPLTZ, 1978 exp.DataType.Type.TIMESTAMPTZ, 1979 ) 1980 1981 if expression.args.get("safe"): 1982 formatted_time = self.format_time(expression) 1983 cast_type = ( 1984 exp.DataType.Type.TIMESTAMPTZ if needs_tz else exp.DataType.Type.TIMESTAMP 1985 ) 1986 return self.sql( 1987 exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type) 1988 ) 1989 1990 base_sql = str_to_time_sql(self, expression) 1991 if needs_tz: 1992 return self.sql( 1993 exp.cast( 1994 base_sql, 1995 exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ), 1996 ) 1997 ) 1998 return base_sql 1999 2000 def strtodate_sql(self, expression: exp.StrToDate) -> str: 2001 formatted_time = self.format_time(expression) 2002 function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME" 2003 return self.sql( 2004 exp.cast( 2005 self.func(function_name, expression.this, formatted_time), 2006 exp.DataType(this=exp.DataType.Type.DATE), 2007 ) 2008 ) 2009 2010 def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str: 2011 this = expression.this 2012 time_format = self.format_time(expression) 2013 safe = expression.args.get("safe") 2014 time_type = exp.DataType.build("TIME", dialect="duckdb") 2015 cast_expr = exp.TryCast if safe else exp.Cast 2016 2017 if time_format: 2018 func_name = "TRY_STRPTIME" if safe else "STRPTIME" 2019 strptime = exp.Anonymous(this=func_name, expressions=[this, time_format]) 2020 return self.sql(cast_expr(this=strptime, to=time_type)) 2021 2022 if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DataType.Type.TIME): 2023 return self.sql(this) 2024 2025 return self.sql(cast_expr(this=this, to=time_type)) 2026 2027 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2028 if not expression.this: 2029 return "CURRENT_DATE" 2030 2031 expr = exp.Cast( 2032 this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this), 2033 to=exp.DataType(this=exp.DataType.Type.DATE), 2034 ) 2035 return self.sql(expr) 2036 2037 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 2038 arg = expression.this 2039 if expression.args.get("safe"): 2040 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 2041 return self.func("JSON", arg) 2042 2043 def normal_sql(self, expression: exp.Normal) -> str: 2044 """ 2045 Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB. 2046 2047 Uses the Box-Muller transform via NORMAL_TEMPLATE. 2048 """ 2049 mean = expression.this 2050 stddev = expression.args["stddev"] 2051 gen: exp.Expression = expression.args["gen"] 2052 2053 # Build two uniform random values [0, 1) for Box-Muller transform 2054 if isinstance(gen, exp.Rand) and gen.this is None: 2055 u1: exp.Expression = exp.Rand() 2056 u2: exp.Expression = exp.Rand() 2057 else: 2058 # Seeded: derive two values using HASH with different inputs 2059 seed = gen.this if isinstance(gen, exp.Rand) else gen 2060 u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed) 2061 u2 = exp.replace_placeholders( 2062 self.SEEDED_RANDOM_TEMPLATE, 2063 seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)), 2064 ) 2065 2066 replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2} 2067 return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements)) 2068 2069 def uniform_sql(self, expression: exp.Uniform) -> str: 2070 """ 2071 Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB. 2072 2073 UNIFORM returns a random value in [min, max]: 2074 - Integer result if both min and max are integers 2075 - Float result if either min or max is a float 2076 """ 2077 min_val = expression.this 2078 max_val = expression.expression 2079 gen = expression.args.get("gen") 2080 2081 # Determine if result should be integer (both bounds are integers). 2082 # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT 2083 is_int_result = min_val.is_int and max_val.is_int 2084 2085 # Build the random value expression [0, 1) 2086 if not isinstance(gen, exp.Rand): 2087 # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0 2088 random_expr: exp.Expression = exp.Div( 2089 this=exp.Paren( 2090 this=exp.Mod( 2091 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])), 2092 expression=exp.Literal.number(1000000), 2093 ) 2094 ), 2095 expression=exp.Literal.number(1000000.0), 2096 ) 2097 else: 2098 random_expr = exp.Rand() 2099 2100 # Build: min + random * (max - min [+ 1 for int]) 2101 range_expr: exp.Expression = exp.Sub(this=max_val, expression=min_val) 2102 if is_int_result: 2103 range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1)) 2104 2105 result: exp.Expression = exp.Add( 2106 this=min_val, 2107 expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)), 2108 ) 2109 2110 if is_int_result: 2111 result = exp.Cast( 2112 this=exp.Floor(this=result), 2113 to=exp.DataType.build("BIGINT"), 2114 ) 2115 2116 return self.sql(result) 2117 2118 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 2119 nano = expression.args.get("nano") 2120 if nano is not None: 2121 expression.set( 2122 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 2123 ) 2124 2125 return rename_func("MAKE_TIME")(self, expression) 2126 2127 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 2128 sec = expression.args["sec"] 2129 2130 milli = expression.args.get("milli") 2131 if milli is not None: 2132 sec += milli.pop() / exp.Literal.number(1000.0) 2133 2134 nano = expression.args.get("nano") 2135 if nano is not None: 2136 sec += nano.pop() / exp.Literal.number(1000000000.0) 2137 2138 if milli or nano: 2139 expression.set("sec", sec) 2140 2141 return rename_func("MAKE_TIMESTAMP")(self, expression) 2142 2143 def tablesample_sql( 2144 self, 2145 expression: exp.TableSample, 2146 tablesample_keyword: t.Optional[str] = None, 2147 ) -> str: 2148 if not isinstance(expression.parent, exp.Select): 2149 # This sample clause only applies to a single source, not the entire resulting relation 2150 tablesample_keyword = "TABLESAMPLE" 2151 2152 if expression.args.get("size"): 2153 method = expression.args.get("method") 2154 if method and method.name.upper() != "RESERVOIR": 2155 self.unsupported( 2156 f"Sampling method {method} is not supported with a discrete sample count, " 2157 "defaulting to reservoir sampling" 2158 ) 2159 expression.set("method", exp.var("RESERVOIR")) 2160 2161 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 2162 2163 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 2164 if isinstance(expression.parent, exp.UserDefinedFunction): 2165 return self.sql(expression, "this") 2166 return super().columndef_sql(expression, sep) 2167 2168 def join_sql(self, expression: exp.Join) -> str: 2169 if ( 2170 not expression.args.get("using") 2171 and not expression.args.get("on") 2172 and not expression.method 2173 and (expression.kind in ("", "INNER", "OUTER")) 2174 ): 2175 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 2176 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 2177 if isinstance(expression.this, exp.Unnest): 2178 return super().join_sql(expression.on(exp.true())) 2179 2180 expression.set("side", None) 2181 expression.set("kind", None) 2182 2183 return super().join_sql(expression) 2184 2185 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 2186 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 2187 if expression.args.get("is_end_exclusive"): 2188 return rename_func("RANGE")(self, expression) 2189 2190 return self.function_fallback_sql(expression) 2191 2192 def countif_sql(self, expression: exp.CountIf) -> str: 2193 if self.dialect.version >= (1, 2): 2194 return self.function_fallback_sql(expression) 2195 2196 # https://github.com/tobymao/sqlglot/pull/4749 2197 return count_if_to_sum(self, expression) 2198 2199 def bracket_sql(self, expression: exp.Bracket) -> str: 2200 if self.dialect.version >= (1, 2): 2201 return super().bracket_sql(expression) 2202 2203 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 2204 this = expression.this 2205 if isinstance(this, exp.Array): 2206 this.replace(exp.paren(this)) 2207 2208 bracket = super().bracket_sql(expression) 2209 2210 if not expression.args.get("returns_list_for_maps"): 2211 if not this.type: 2212 from sqlglot.optimizer.annotate_types import annotate_types 2213 2214 this = annotate_types(this, dialect=self.dialect) 2215 2216 if this.is_type(exp.DataType.Type.MAP): 2217 bracket = f"({bracket})[1]" 2218 2219 return bracket 2220 2221 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 2222 func = expression.this 2223 2224 # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP 2225 # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y) 2226 if isinstance(func, exp.ArrayAgg): 2227 if not isinstance(order := expression.expression, exp.Order): 2228 return self.sql(func) 2229 2230 # Save the original column for FILTER clause (before wrapping with Order) 2231 original_this = func.this 2232 2233 # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order 2234 # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions) 2235 func.set( 2236 "this", 2237 exp.Order( 2238 this=func.this.copy(), 2239 expressions=order.expressions, 2240 ), 2241 ) 2242 2243 # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed 2244 # Use original_this (not the Order-wrapped version) for the FILTER condition 2245 array_agg_sql = self.function_fallback_sql(func) 2246 return self._add_arrayagg_null_filter(array_agg_sql, func, original_this) 2247 2248 # For other functions (like PERCENTILES), use existing logic 2249 expression_sql = self.sql(expression, "expression") 2250 2251 if isinstance(func, exp.PERCENTILES): 2252 # Make the order key the first arg and slide the fraction to the right 2253 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 2254 order_col = expression.find(exp.Ordered) 2255 if order_col: 2256 func.set("expression", func.this) 2257 func.set("this", order_col.this) 2258 2259 this = self.sql(expression, "this").rstrip(")") 2260 2261 return f"{this}{expression_sql})" 2262 2263 def length_sql(self, expression: exp.Length) -> str: 2264 arg = expression.this 2265 2266 # Dialects like BQ and Snowflake also accept binary values as args, so 2267 # DDB will attempt to infer the type or resort to case/when resolution 2268 if not expression.args.get("binary") or arg.is_string: 2269 return self.func("LENGTH", arg) 2270 2271 if not arg.type: 2272 from sqlglot.optimizer.annotate_types import annotate_types 2273 2274 arg = annotate_types(arg, dialect=self.dialect) 2275 2276 if arg.is_type(*exp.DataType.TEXT_TYPES): 2277 return self.func("LENGTH", arg) 2278 2279 # We need these casts to make duckdb's static type checker happy 2280 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 2281 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 2282 2283 case = ( 2284 exp.case(self.func("TYPEOF", arg)) 2285 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 2286 .else_( 2287 exp.Anonymous(this="LENGTH", expressions=[varchar]) 2288 ) # anonymous to break length_sql recursion 2289 ) 2290 2291 return self.sql(case) 2292 2293 def lower_sql(self, expression: exp.Lower) -> str: 2294 result_sql = self.func("LOWER", _cast_to_varchar(expression.this)) 2295 return _gen_with_cast_to_blob(self, expression, result_sql) 2296 2297 def upper_sql(self, expression: exp.Upper) -> str: 2298 result_sql = self.func("UPPER", _cast_to_varchar(expression.this)) 2299 return _gen_with_cast_to_blob(self, expression, result_sql) 2300 2301 def replace_sql(self, expression: exp.Replace) -> str: 2302 result_sql = self.func( 2303 "REPLACE", 2304 _cast_to_varchar(expression.this), 2305 _cast_to_varchar(expression.expression), 2306 _cast_to_varchar(expression.args.get("replacement")), 2307 ) 2308 return _gen_with_cast_to_blob(self, expression, result_sql) 2309 2310 def _bitwise_op(self, expression: exp.Binary, op: str) -> str: 2311 _prepare_binary_bitwise_args(expression) 2312 result_sql = self.binary(expression, op) 2313 return _gen_with_cast_to_blob(self, expression, result_sql) 2314 2315 def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str: 2316 _prepare_binary_bitwise_args(expression) 2317 result_sql = self.func("XOR", expression.this, expression.expression) 2318 return _gen_with_cast_to_blob(self, expression, result_sql) 2319 2320 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 2321 this = expression.this 2322 key = expression.args.get("key") 2323 key_sql = key.name if isinstance(key, exp.Expression) else "" 2324 value_sql = self.sql(expression, "value") 2325 2326 kv_sql = f"{key_sql} := {value_sql}" 2327 2328 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 2329 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 2330 if isinstance(this, exp.Struct) and not this.expressions: 2331 return self.func("STRUCT_PACK", kv_sql) 2332 2333 return self.func("STRUCT_INSERT", this, kv_sql) 2334 2335 def startswith_sql(self, expression: exp.StartsWith) -> str: 2336 return self.func( 2337 "STARTS_WITH", 2338 _cast_to_varchar(expression.this), 2339 _cast_to_varchar(expression.expression), 2340 ) 2341 2342 def unnest_sql(self, expression: exp.Unnest) -> str: 2343 explode_array = expression.args.get("explode_array") 2344 if explode_array: 2345 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 2346 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 2347 expression.expressions.append( 2348 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 2349 ) 2350 2351 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 2352 alias = expression.args.get("alias") 2353 if isinstance(alias, exp.TableAlias): 2354 expression.set("alias", None) 2355 if alias.columns: 2356 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 2357 2358 unnest_sql = super().unnest_sql(expression) 2359 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 2360 return self.sql(select) 2361 2362 return super().unnest_sql(expression) 2363 2364 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 2365 this = expression.this 2366 2367 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2368 # DuckDB should render IGNORE NULLS only for the general-purpose 2369 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 2370 return super().ignorenulls_sql(expression) 2371 2372 if isinstance(this, exp.First): 2373 this = exp.AnyValue(this=this.this) 2374 2375 if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)): 2376 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 2377 2378 return self.sql(this) 2379 2380 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 2381 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2382 # DuckDB should render RESPECT NULLS only for the general-purpose 2383 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 2384 return super().respectnulls_sql(expression) 2385 2386 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 2387 return self.sql(expression, "this") 2388 2389 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 2390 this = self.sql(expression, "this") 2391 null_text = self.sql(expression, "null") 2392 2393 if null_text: 2394 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 2395 2396 return self.func("ARRAY_TO_STRING", this, expression.expression) 2397 2398 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 2399 this = expression.this 2400 group = expression.args.get("group") 2401 params = expression.args.get("parameters") 2402 position = expression.args.get("position") 2403 occurrence = expression.args.get("occurrence") 2404 null_if_pos_overflow = expression.args.get("null_if_pos_overflow") 2405 2406 if position and (not position.is_int or position.to_py() > 1): 2407 this = exp.Substring(this=this, start=position) 2408 2409 if null_if_pos_overflow: 2410 this = exp.Nullif(this=this, expression=exp.Literal.string("")) 2411 2412 # Do not render group if there is no following argument, 2413 # and it's the default value for this dialect 2414 if ( 2415 not params 2416 and group 2417 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 2418 ): 2419 group = None 2420 2421 if occurrence and (not occurrence.is_int or occurrence.to_py() > 1): 2422 return self.func( 2423 "ARRAY_EXTRACT", 2424 self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params), 2425 exp.Literal.number(occurrence), 2426 ) 2427 2428 return self.func("REGEXP_EXTRACT", this, expression.expression, group, params) 2429 2430 @unsupported_args("culture") 2431 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 2432 fmt = expression.args.get("format") 2433 if fmt and fmt.is_int: 2434 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 2435 2436 self.unsupported("Only integer formats are supported by NumberToStr") 2437 return self.function_fallback_sql(expression) 2438 2439 def autoincrementcolumnconstraint_sql(self, _) -> str: 2440 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 2441 return "" 2442 2443 def aliases_sql(self, expression: exp.Aliases) -> str: 2444 this = expression.this 2445 if isinstance(this, exp.Posexplode): 2446 return self.posexplode_sql(this) 2447 2448 return super().aliases_sql(expression) 2449 2450 def posexplode_sql(self, expression: exp.Posexplode) -> str: 2451 this = expression.this 2452 parent = expression.parent 2453 2454 # The default Spark aliases are "pos" and "col", unless specified otherwise 2455 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 2456 2457 if isinstance(parent, exp.Aliases): 2458 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 2459 pos, col = parent.expressions 2460 elif isinstance(parent, exp.Table): 2461 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 2462 alias = parent.args.get("alias") 2463 if alias: 2464 pos, col = alias.columns or [pos, col] 2465 alias.pop() 2466 2467 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 2468 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 2469 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 2470 gen_subscripts = self.sql( 2471 exp.Alias( 2472 this=exp.Anonymous( 2473 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 2474 ) 2475 - exp.Literal.number(1), 2476 alias=pos, 2477 ) 2478 ) 2479 2480 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 2481 2482 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 2483 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 2484 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 2485 2486 return posexplode_sql 2487 2488 def addmonths_sql(self, expression: exp.AddMonths) -> str: 2489 """ 2490 Handles three key issues: 2491 1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers 2492 2. End-of-month preservation: If input is last day of month, result is last day of result month 2493 3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP) 2494 """ 2495 from sqlglot.optimizer.annotate_types import annotate_types 2496 2497 this = expression.this 2498 if not this.type: 2499 this = annotate_types(this, dialect=self.dialect) 2500 2501 if this.is_type(*exp.DataType.TEXT_TYPES): 2502 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 2503 2504 # Detect float/decimal months to apply rounding (Snowflake behavior) 2505 # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS 2506 months_expr = expression.expression 2507 if not months_expr.type: 2508 months_expr = annotate_types(months_expr, dialect=self.dialect) 2509 2510 # Build interval or to_months expression based on type 2511 # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT)) 2512 interval_or_to_months = ( 2513 exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT")) 2514 if months_expr.is_type( 2515 exp.DataType.Type.FLOAT, 2516 exp.DataType.Type.DOUBLE, 2517 exp.DataType.Type.DECIMAL, 2518 ) 2519 # Integer case: standard INTERVAL N MONTH syntax 2520 else exp.Interval(this=months_expr, unit=exp.var("MONTH")) 2521 ) 2522 2523 date_add_expr = exp.Add(this=this, expression=interval_or_to_months) 2524 2525 # Apply end-of-month preservation if Snowflake flag is set 2526 # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END 2527 preserve_eom = expression.args.get("preserve_end_of_month") 2528 result_expr = ( 2529 exp.case() 2530 .when( 2531 exp.EQ(this=exp.func("LAST_DAY", this), expression=this), 2532 exp.func("LAST_DAY", date_add_expr), 2533 ) 2534 .else_(date_add_expr) 2535 if preserve_eom 2536 else date_add_expr 2537 ) 2538 2539 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 2540 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 2541 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 2542 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 2543 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 2544 return self.sql(exp.Cast(this=result_expr, to=this.type)) 2545 return self.sql(result_expr) 2546 2547 def format_sql(self, expression: exp.Format) -> str: 2548 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2549 return self.func("FORMAT", "'{}'", expression.expressions[0]) 2550 2551 return self.function_fallback_sql(expression) 2552 2553 def hexstring_sql( 2554 self, expression: exp.HexString, binary_function_repr: t.Optional[str] = None 2555 ) -> str: 2556 # UNHEX('FF') correctly produces blob \xFF in DuckDB 2557 return super().hexstring_sql(expression, binary_function_repr="UNHEX") 2558 2559 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 2560 unit = unit_to_str(expression) 2561 date = expression.this 2562 result = self.func("DATE_TRUNC", unit, date) 2563 2564 if expression.args.get("input_type_preserved"): 2565 if not date.type: 2566 from sqlglot.optimizer.annotate_types import annotate_types 2567 2568 date = annotate_types(date, dialect=self.dialect) 2569 2570 if date.type and date.is_type(*exp.DataType.TEMPORAL_TYPES): 2571 return self.sql(exp.Cast(this=result, to=date.type)) 2572 return result 2573 2574 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 2575 unit = unit_to_str(expression) 2576 zone = expression.args.get("zone") 2577 timestamp = expression.this 2578 2579 if is_date_unit(unit) and zone: 2580 # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC. 2581 # Double AT TIME ZONE needed for BigQuery compatibility: 2582 # 1. First AT TIME ZONE: ensures truncation happens in the target timezone 2583 # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component) 2584 timestamp = exp.AtTimeZone(this=timestamp, zone=zone) 2585 result_sql = self.func("DATE_TRUNC", unit, timestamp) 2586 return self.sql(exp.AtTimeZone(this=result_sql, zone=zone)) 2587 2588 result = self.func("DATE_TRUNC", unit, timestamp) 2589 if expression.args.get("input_type_preserved"): 2590 if not timestamp.type: 2591 from sqlglot.optimizer.annotate_types import annotate_types 2592 2593 timestamp = annotate_types(timestamp, dialect=self.dialect) 2594 2595 if timestamp.type and timestamp.is_type( 2596 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ 2597 ): 2598 dummy_date = exp.Cast( 2599 this=exp.Literal.string("1970-01-01"), 2600 to=exp.DataType(this=exp.DataType.Type.DATE), 2601 ) 2602 date_time = exp.Add(this=dummy_date, expression=timestamp) 2603 result = self.func("DATE_TRUNC", unit, date_time) 2604 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2605 2606 if timestamp.type and timestamp.is_type(*exp.DataType.TEMPORAL_TYPES): 2607 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2608 return result 2609 2610 def trim_sql(self, expression: exp.Trim) -> str: 2611 expression.this.replace(_cast_to_varchar(expression.this)) 2612 if expression.expression: 2613 expression.expression.replace(_cast_to_varchar(expression.expression)) 2614 2615 result_sql = super().trim_sql(expression) 2616 return _gen_with_cast_to_blob(self, expression, result_sql) 2617 2618 def round_sql(self, expression: exp.Round) -> str: 2619 this = expression.this 2620 decimals = expression.args.get("decimals") 2621 truncate = expression.args.get("truncate") 2622 2623 # DuckDB requires the scale (decimals) argument to be an INT 2624 # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally 2625 if decimals is not None and expression.args.get("casts_non_integer_decimals"): 2626 if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)): 2627 decimals = exp.cast(decimals, exp.DataType.Type.INT) 2628 2629 func = "ROUND" 2630 if truncate: 2631 # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN 2632 if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"): 2633 func = "ROUND_EVEN" 2634 truncate = None 2635 # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO 2636 elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"): 2637 truncate = None 2638 2639 return self.func(func, this, decimals, truncate) 2640 2641 def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str: 2642 """ 2643 BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values 2644 dividing the input distribution into n equal-sized buckets. 2645 2646 Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery 2647 does not document the specific algorithm used so results may differ. DuckDB does not 2648 support RESPECT NULLS. 2649 """ 2650 this = expression.this 2651 if isinstance(this, exp.Distinct): 2652 # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both 2653 if len(this.expressions) < 2: 2654 self.unsupported("APPROX_QUANTILES requires a bucket count argument") 2655 return self.function_fallback_sql(expression) 2656 num_quantiles_expr = this.expressions[1].pop() 2657 else: 2658 num_quantiles_expr = expression.expression 2659 2660 if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int: 2661 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2662 return self.function_fallback_sql(expression) 2663 2664 num_quantiles = t.cast(int, num_quantiles_expr.to_py()) 2665 if num_quantiles <= 0: 2666 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2667 return self.function_fallback_sql(expression) 2668 2669 quantiles = [ 2670 exp.Literal.number(Decimal(i) / Decimal(num_quantiles)) 2671 for i in range(num_quantiles + 1) 2672 ] 2673 2674 return self.sql( 2675 exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)) 2676 ) 2677 2678 def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str: 2679 if expression.args.get("scalar_only"): 2680 expression = exp.JSONExtractScalar( 2681 this=rename_func("JSON_VALUE")(self, expression), expression="'$'" 2682 ) 2683 return _arrow_json_extract_sql(self, expression) 2684 2685 def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str: 2686 this = expression.this 2687 2688 if _is_binary(this): 2689 expression.type = exp.DataType.build("BINARY") 2690 2691 arg = _cast_to_bit(this) 2692 2693 if isinstance(this, exp.Neg): 2694 arg = exp.Paren(this=arg) 2695 2696 expression.set("this", arg) 2697 2698 result_sql = f"~{self.sql(expression, 'this')}" 2699 2700 return _gen_with_cast_to_blob(self, expression, result_sql) 2701 2702 def window_sql(self, expression: exp.Window) -> str: 2703 this = expression.this 2704 if isinstance(this, exp.Corr) or ( 2705 isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr) 2706 ): 2707 return self._corr_sql(expression) 2708 2709 return super().window_sql(expression) 2710 2711 def filter_sql(self, expression: exp.Filter) -> str: 2712 if isinstance(expression.this, exp.Corr): 2713 return self._corr_sql(expression) 2714 2715 return super().filter_sql(expression) 2716 2717 def _corr_sql( 2718 self, 2719 expression: t.Union[exp.Filter, exp.Window, exp.Corr], 2720 ) -> str: 2721 if isinstance(expression, exp.Corr) and not expression.args.get( 2722 "null_on_zero_variance" 2723 ): 2724 return self.func("CORR", expression.this, expression.expression) 2725 2726 corr_expr = _maybe_corr_null_to_false(expression) 2727 if corr_expr is None: 2728 if isinstance(expression, exp.Window): 2729 return super().window_sql(expression) 2730 if isinstance(expression, exp.Filter): 2731 return super().filter_sql(expression) 2732 corr_expr = expression # make mypy happy 2733 2734 return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1764 def bitmapbucketnumber_sql( 1765 self: DuckDB.Generator, expression: exp.BitmapBucketNumber 1766 ) -> str: 1767 """ 1768 Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent. 1769 1770 Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where: 1771 - Each bucket covers 32,768 values 1772 - Bucket numbering starts at 1 1773 - Formula: ((value - 1) // 32768) + 1 for positive values 1774 1775 For non-positive values (0 and negative), we use value // 32768 to avoid 1776 producing bucket 0 or positive bucket IDs for negative inputs. 1777 """ 1778 value = expression.this 1779 1780 positive_formula = ((value - 1) // 32768) + 1 1781 non_positive_formula = value // 32768 1782 1783 # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END 1784 case_expr = ( 1785 exp.case() 1786 .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula) 1787 .else_(non_positive_formula) 1788 ) 1789 return self.sql(case_expr)
Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
- Each bucket covers 32,768 values
- Bucket numbering starts at 1
- Formula: ((value - 1) // 32768) + 1 for positive values
For non-positive values (0 and negative), we use value // 32768 to avoid producing bucket 0 or positive bucket IDs for negative inputs.
1791 def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str: 1792 """ 1793 Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression. 1794 1795 Snowflake's BITMAP_BIT_POSITION behavior: 1796 - For n <= 0: returns ABS(n) % 32768 1797 - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767) 1798 """ 1799 this = expression.this 1800 1801 return self.sql( 1802 exp.Mod( 1803 this=exp.Paren( 1804 this=exp.If( 1805 this=exp.GT(this=this, expression=exp.Literal.number(0)), 1806 true=this - exp.Literal.number(1), 1807 false=exp.Abs(this=this), 1808 ) 1809 ), 1810 expression=MAX_BIT_POSITION, 1811 ) 1812 )
Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
Snowflake's BITMAP_BIT_POSITION behavior:
- For n <= 0: returns ABS(n) % 32768
- For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
1814 def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str: 1815 """ 1816 Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. 1817 Uses a pre-parsed template with placeholders replaced by expression nodes. 1818 1819 RANDSTR(length, generator) generates a random string of specified length. 1820 - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result) 1821 - With RANDOM(): Use RANDOM() in the hash for non-deterministic output 1822 - No generator: Use default seed value 1823 """ 1824 length = expression.this 1825 generator = expression.args.get("generator") 1826 1827 if generator: 1828 if isinstance(generator, exp.Rand): 1829 # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself 1830 seed_value = generator.this or generator 1831 else: 1832 # Const/int or other expression - use as seed directly 1833 seed_value = generator 1834 else: 1835 # No generator specified, use default seed (arbitrary but deterministic) 1836 seed_value = exp.Literal.number(RANDSTR_SEED) 1837 1838 replacements = {"seed": seed_value, "length": length} 1839 return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. Uses a pre-parsed template with placeholders replaced by expression nodes.
RANDSTR(length, generator) generates a random string of specified length.
- With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
- With RANDOM(): Use RANDOM() in the hash for non-deterministic output
- No generator: Use default seed value
1841 def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str: 1842 """ 1843 Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. 1844 Uses a pre-parsed template with placeholders replaced by expression nodes. 1845 """ 1846 s = expression.this 1847 n = expression.args["elementcount"] 1848 gen = expression.args["gen"] 1849 1850 if not isinstance(gen, exp.Rand): 1851 # (ABS(HASH(seed)) % 1000000) / 1000000.0 1852 random_expr: exp.Expression = exp.Div( 1853 this=exp.Paren( 1854 this=exp.Mod( 1855 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])), 1856 expression=exp.Literal.number(1000000), 1857 ) 1858 ), 1859 expression=exp.Literal.number(1000000.0), 1860 ) 1861 else: 1862 # Use RANDOM() for non-deterministic output 1863 random_expr = exp.Rand() 1864 1865 replacements = {"s": s, "n": n, "random_expr": random_expr} 1866 return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. Uses a pre-parsed template with placeholders replaced by expression nodes.
1868 def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str: 1869 """ 1870 TO_BINARY and TRY_TO_BINARY transpilation: 1871 - 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50') 1872 - 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST') 1873 - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==') 1874 1875 For TRY_TO_BINARY (safe=True), wrap with TRY(): 1876 - 'HEX': TRY_TO_BINARY('invalid', 'HEX') → TRY(UNHEX('invalid')) 1877 """ 1878 value = expression.this 1879 format_arg = expression.args.get("format") 1880 is_safe = expression.args.get("safe") 1881 1882 fmt = "HEX" 1883 if format_arg: 1884 fmt = format_arg.name.upper() 1885 1886 if expression.is_type(exp.DataType.Type.BINARY): 1887 if fmt == "UTF-8": 1888 result = self.func("ENCODE", value) 1889 elif fmt == "BASE64": 1890 result = self.func("FROM_BASE64", value) 1891 elif fmt == "HEX": 1892 result = self.func("UNHEX", value) 1893 else: 1894 if is_safe: 1895 return self.sql(exp.null()) 1896 else: 1897 self.unsupported(f"format {fmt} is not supported") 1898 result = self.func("TO_BINARY", value) 1899 1900 # Wrap with TRY() for TRY_TO_BINARY 1901 if is_safe: 1902 result = self.func("TRY", result) 1903 1904 return result 1905 1906 # Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake 1907 return self.func("TO_BINARY", value)
TO_BINARY and TRY_TO_BINARY transpilation:
- 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50')
- 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST')
- 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==')
For TRY_TO_BINARY (safe=True), wrap with TRY():
- 'HEX': TRY_TO_BINARY('invalid', 'HEX') → TRY(UNHEX('invalid'))
1941 def lambda_sql( 1942 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 1943 ) -> str: 1944 if expression.args.get("colon"): 1945 prefix = "LAMBDA " 1946 arrow_sep = ":" 1947 wrap = False 1948 else: 1949 prefix = "" 1950 1951 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 1952 return f"{prefix}{lambda_sql}"
1957 def install_sql(self, expression: exp.Install) -> str: 1958 force = "FORCE " if expression.args.get("force") else "" 1959 this = self.sql(expression, "this") 1960 from_clause = expression.args.get("from_") 1961 from_clause = f" FROM {from_clause}" if from_clause else "" 1962 return f"{force}INSTALL {this}{from_clause}"
1973 def strtotime_sql(self, expression: exp.StrToTime) -> str: 1974 # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants) 1975 target_type = expression.args.get("target_type") 1976 needs_tz = target_type and target_type.this in ( 1977 exp.DataType.Type.TIMESTAMPLTZ, 1978 exp.DataType.Type.TIMESTAMPTZ, 1979 ) 1980 1981 if expression.args.get("safe"): 1982 formatted_time = self.format_time(expression) 1983 cast_type = ( 1984 exp.DataType.Type.TIMESTAMPTZ if needs_tz else exp.DataType.Type.TIMESTAMP 1985 ) 1986 return self.sql( 1987 exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type) 1988 ) 1989 1990 base_sql = str_to_time_sql(self, expression) 1991 if needs_tz: 1992 return self.sql( 1993 exp.cast( 1994 base_sql, 1995 exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ), 1996 ) 1997 ) 1998 return base_sql
2000 def strtodate_sql(self, expression: exp.StrToDate) -> str: 2001 formatted_time = self.format_time(expression) 2002 function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME" 2003 return self.sql( 2004 exp.cast( 2005 self.func(function_name, expression.this, formatted_time), 2006 exp.DataType(this=exp.DataType.Type.DATE), 2007 ) 2008 )
2010 def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str: 2011 this = expression.this 2012 time_format = self.format_time(expression) 2013 safe = expression.args.get("safe") 2014 time_type = exp.DataType.build("TIME", dialect="duckdb") 2015 cast_expr = exp.TryCast if safe else exp.Cast 2016 2017 if time_format: 2018 func_name = "TRY_STRPTIME" if safe else "STRPTIME" 2019 strptime = exp.Anonymous(this=func_name, expressions=[this, time_format]) 2020 return self.sql(cast_expr(this=strptime, to=time_type)) 2021 2022 if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DataType.Type.TIME): 2023 return self.sql(this) 2024 2025 return self.sql(cast_expr(this=this, to=time_type))
2027 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2028 if not expression.this: 2029 return "CURRENT_DATE" 2030 2031 expr = exp.Cast( 2032 this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this), 2033 to=exp.DataType(this=exp.DataType.Type.DATE), 2034 ) 2035 return self.sql(expr)
2043 def normal_sql(self, expression: exp.Normal) -> str: 2044 """ 2045 Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB. 2046 2047 Uses the Box-Muller transform via NORMAL_TEMPLATE. 2048 """ 2049 mean = expression.this 2050 stddev = expression.args["stddev"] 2051 gen: exp.Expression = expression.args["gen"] 2052 2053 # Build two uniform random values [0, 1) for Box-Muller transform 2054 if isinstance(gen, exp.Rand) and gen.this is None: 2055 u1: exp.Expression = exp.Rand() 2056 u2: exp.Expression = exp.Rand() 2057 else: 2058 # Seeded: derive two values using HASH with different inputs 2059 seed = gen.this if isinstance(gen, exp.Rand) else gen 2060 u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed) 2061 u2 = exp.replace_placeholders( 2062 self.SEEDED_RANDOM_TEMPLATE, 2063 seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)), 2064 ) 2065 2066 replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2} 2067 return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
Uses the Box-Muller transform via NORMAL_TEMPLATE.
2069 def uniform_sql(self, expression: exp.Uniform) -> str: 2070 """ 2071 Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB. 2072 2073 UNIFORM returns a random value in [min, max]: 2074 - Integer result if both min and max are integers 2075 - Float result if either min or max is a float 2076 """ 2077 min_val = expression.this 2078 max_val = expression.expression 2079 gen = expression.args.get("gen") 2080 2081 # Determine if result should be integer (both bounds are integers). 2082 # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT 2083 is_int_result = min_val.is_int and max_val.is_int 2084 2085 # Build the random value expression [0, 1) 2086 if not isinstance(gen, exp.Rand): 2087 # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0 2088 random_expr: exp.Expression = exp.Div( 2089 this=exp.Paren( 2090 this=exp.Mod( 2091 this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])), 2092 expression=exp.Literal.number(1000000), 2093 ) 2094 ), 2095 expression=exp.Literal.number(1000000.0), 2096 ) 2097 else: 2098 random_expr = exp.Rand() 2099 2100 # Build: min + random * (max - min [+ 1 for int]) 2101 range_expr: exp.Expression = exp.Sub(this=max_val, expression=min_val) 2102 if is_int_result: 2103 range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1)) 2104 2105 result: exp.Expression = exp.Add( 2106 this=min_val, 2107 expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)), 2108 ) 2109 2110 if is_int_result: 2111 result = exp.Cast( 2112 this=exp.Floor(this=result), 2113 to=exp.DataType.build("BIGINT"), 2114 ) 2115 2116 return self.sql(result)
Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
UNIFORM returns a random value in [min, max]:
- Integer result if both min and max are integers
- Float result if either min or max is a float
2118 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 2119 nano = expression.args.get("nano") 2120 if nano is not None: 2121 expression.set( 2122 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 2123 ) 2124 2125 return rename_func("MAKE_TIME")(self, expression)
2127 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 2128 sec = expression.args["sec"] 2129 2130 milli = expression.args.get("milli") 2131 if milli is not None: 2132 sec += milli.pop() / exp.Literal.number(1000.0) 2133 2134 nano = expression.args.get("nano") 2135 if nano is not None: 2136 sec += nano.pop() / exp.Literal.number(1000000000.0) 2137 2138 if milli or nano: 2139 expression.set("sec", sec) 2140 2141 return rename_func("MAKE_TIMESTAMP")(self, expression)
2143 def tablesample_sql( 2144 self, 2145 expression: exp.TableSample, 2146 tablesample_keyword: t.Optional[str] = None, 2147 ) -> str: 2148 if not isinstance(expression.parent, exp.Select): 2149 # This sample clause only applies to a single source, not the entire resulting relation 2150 tablesample_keyword = "TABLESAMPLE" 2151 2152 if expression.args.get("size"): 2153 method = expression.args.get("method") 2154 if method and method.name.upper() != "RESERVOIR": 2155 self.unsupported( 2156 f"Sampling method {method} is not supported with a discrete sample count, " 2157 "defaulting to reservoir sampling" 2158 ) 2159 expression.set("method", exp.var("RESERVOIR")) 2160 2161 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
2168 def join_sql(self, expression: exp.Join) -> str: 2169 if ( 2170 not expression.args.get("using") 2171 and not expression.args.get("on") 2172 and not expression.method 2173 and (expression.kind in ("", "INNER", "OUTER")) 2174 ): 2175 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 2176 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 2177 if isinstance(expression.this, exp.Unnest): 2178 return super().join_sql(expression.on(exp.true())) 2179 2180 expression.set("side", None) 2181 expression.set("kind", None) 2182 2183 return super().join_sql(expression)
2199 def bracket_sql(self, expression: exp.Bracket) -> str: 2200 if self.dialect.version >= (1, 2): 2201 return super().bracket_sql(expression) 2202 2203 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 2204 this = expression.this 2205 if isinstance(this, exp.Array): 2206 this.replace(exp.paren(this)) 2207 2208 bracket = super().bracket_sql(expression) 2209 2210 if not expression.args.get("returns_list_for_maps"): 2211 if not this.type: 2212 from sqlglot.optimizer.annotate_types import annotate_types 2213 2214 this = annotate_types(this, dialect=self.dialect) 2215 2216 if this.is_type(exp.DataType.Type.MAP): 2217 bracket = f"({bracket})[1]" 2218 2219 return bracket
2221 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 2222 func = expression.this 2223 2224 # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP 2225 # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y) 2226 if isinstance(func, exp.ArrayAgg): 2227 if not isinstance(order := expression.expression, exp.Order): 2228 return self.sql(func) 2229 2230 # Save the original column for FILTER clause (before wrapping with Order) 2231 original_this = func.this 2232 2233 # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order 2234 # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions) 2235 func.set( 2236 "this", 2237 exp.Order( 2238 this=func.this.copy(), 2239 expressions=order.expressions, 2240 ), 2241 ) 2242 2243 # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed 2244 # Use original_this (not the Order-wrapped version) for the FILTER condition 2245 array_agg_sql = self.function_fallback_sql(func) 2246 return self._add_arrayagg_null_filter(array_agg_sql, func, original_this) 2247 2248 # For other functions (like PERCENTILES), use existing logic 2249 expression_sql = self.sql(expression, "expression") 2250 2251 if isinstance(func, exp.PERCENTILES): 2252 # Make the order key the first arg and slide the fraction to the right 2253 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 2254 order_col = expression.find(exp.Ordered) 2255 if order_col: 2256 func.set("expression", func.this) 2257 func.set("this", order_col.this) 2258 2259 this = self.sql(expression, "this").rstrip(")") 2260 2261 return f"{this}{expression_sql})"
2263 def length_sql(self, expression: exp.Length) -> str: 2264 arg = expression.this 2265 2266 # Dialects like BQ and Snowflake also accept binary values as args, so 2267 # DDB will attempt to infer the type or resort to case/when resolution 2268 if not expression.args.get("binary") or arg.is_string: 2269 return self.func("LENGTH", arg) 2270 2271 if not arg.type: 2272 from sqlglot.optimizer.annotate_types import annotate_types 2273 2274 arg = annotate_types(arg, dialect=self.dialect) 2275 2276 if arg.is_type(*exp.DataType.TEXT_TYPES): 2277 return self.func("LENGTH", arg) 2278 2279 # We need these casts to make duckdb's static type checker happy 2280 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 2281 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 2282 2283 case = ( 2284 exp.case(self.func("TYPEOF", arg)) 2285 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 2286 .else_( 2287 exp.Anonymous(this="LENGTH", expressions=[varchar]) 2288 ) # anonymous to break length_sql recursion 2289 ) 2290 2291 return self.sql(case)
2301 def replace_sql(self, expression: exp.Replace) -> str: 2302 result_sql = self.func( 2303 "REPLACE", 2304 _cast_to_varchar(expression.this), 2305 _cast_to_varchar(expression.expression), 2306 _cast_to_varchar(expression.args.get("replacement")), 2307 ) 2308 return _gen_with_cast_to_blob(self, expression, result_sql)
2320 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 2321 this = expression.this 2322 key = expression.args.get("key") 2323 key_sql = key.name if isinstance(key, exp.Expression) else "" 2324 value_sql = self.sql(expression, "value") 2325 2326 kv_sql = f"{key_sql} := {value_sql}" 2327 2328 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 2329 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 2330 if isinstance(this, exp.Struct) and not this.expressions: 2331 return self.func("STRUCT_PACK", kv_sql) 2332 2333 return self.func("STRUCT_INSERT", this, kv_sql)
2342 def unnest_sql(self, expression: exp.Unnest) -> str: 2343 explode_array = expression.args.get("explode_array") 2344 if explode_array: 2345 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 2346 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 2347 expression.expressions.append( 2348 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 2349 ) 2350 2351 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 2352 alias = expression.args.get("alias") 2353 if isinstance(alias, exp.TableAlias): 2354 expression.set("alias", None) 2355 if alias.columns: 2356 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 2357 2358 unnest_sql = super().unnest_sql(expression) 2359 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 2360 return self.sql(select) 2361 2362 return super().unnest_sql(expression)
2364 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 2365 this = expression.this 2366 2367 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2368 # DuckDB should render IGNORE NULLS only for the general-purpose 2369 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 2370 return super().ignorenulls_sql(expression) 2371 2372 if isinstance(this, exp.First): 2373 this = exp.AnyValue(this=this.this) 2374 2375 if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)): 2376 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 2377 2378 return self.sql(this)
2380 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 2381 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 2382 # DuckDB should render RESPECT NULLS only for the general-purpose 2383 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 2384 return super().respectnulls_sql(expression) 2385 2386 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 2387 return self.sql(expression, "this")
2389 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 2390 this = self.sql(expression, "this") 2391 null_text = self.sql(expression, "null") 2392 2393 if null_text: 2394 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 2395 2396 return self.func("ARRAY_TO_STRING", this, expression.expression)
2398 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 2399 this = expression.this 2400 group = expression.args.get("group") 2401 params = expression.args.get("parameters") 2402 position = expression.args.get("position") 2403 occurrence = expression.args.get("occurrence") 2404 null_if_pos_overflow = expression.args.get("null_if_pos_overflow") 2405 2406 if position and (not position.is_int or position.to_py() > 1): 2407 this = exp.Substring(this=this, start=position) 2408 2409 if null_if_pos_overflow: 2410 this = exp.Nullif(this=this, expression=exp.Literal.string("")) 2411 2412 # Do not render group if there is no following argument, 2413 # and it's the default value for this dialect 2414 if ( 2415 not params 2416 and group 2417 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 2418 ): 2419 group = None 2420 2421 if occurrence and (not occurrence.is_int or occurrence.to_py() > 1): 2422 return self.func( 2423 "ARRAY_EXTRACT", 2424 self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params), 2425 exp.Literal.number(occurrence), 2426 ) 2427 2428 return self.func("REGEXP_EXTRACT", this, expression.expression, group, params)
2430 @unsupported_args("culture") 2431 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 2432 fmt = expression.args.get("format") 2433 if fmt and fmt.is_int: 2434 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 2435 2436 self.unsupported("Only integer formats are supported by NumberToStr") 2437 return self.function_fallback_sql(expression)
2450 def posexplode_sql(self, expression: exp.Posexplode) -> str: 2451 this = expression.this 2452 parent = expression.parent 2453 2454 # The default Spark aliases are "pos" and "col", unless specified otherwise 2455 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 2456 2457 if isinstance(parent, exp.Aliases): 2458 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 2459 pos, col = parent.expressions 2460 elif isinstance(parent, exp.Table): 2461 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 2462 alias = parent.args.get("alias") 2463 if alias: 2464 pos, col = alias.columns or [pos, col] 2465 alias.pop() 2466 2467 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 2468 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 2469 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 2470 gen_subscripts = self.sql( 2471 exp.Alias( 2472 this=exp.Anonymous( 2473 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 2474 ) 2475 - exp.Literal.number(1), 2476 alias=pos, 2477 ) 2478 ) 2479 2480 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 2481 2482 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 2483 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 2484 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 2485 2486 return posexplode_sql
2488 def addmonths_sql(self, expression: exp.AddMonths) -> str: 2489 """ 2490 Handles three key issues: 2491 1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers 2492 2. End-of-month preservation: If input is last day of month, result is last day of result month 2493 3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP) 2494 """ 2495 from sqlglot.optimizer.annotate_types import annotate_types 2496 2497 this = expression.this 2498 if not this.type: 2499 this = annotate_types(this, dialect=self.dialect) 2500 2501 if this.is_type(*exp.DataType.TEXT_TYPES): 2502 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 2503 2504 # Detect float/decimal months to apply rounding (Snowflake behavior) 2505 # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS 2506 months_expr = expression.expression 2507 if not months_expr.type: 2508 months_expr = annotate_types(months_expr, dialect=self.dialect) 2509 2510 # Build interval or to_months expression based on type 2511 # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT)) 2512 interval_or_to_months = ( 2513 exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT")) 2514 if months_expr.is_type( 2515 exp.DataType.Type.FLOAT, 2516 exp.DataType.Type.DOUBLE, 2517 exp.DataType.Type.DECIMAL, 2518 ) 2519 # Integer case: standard INTERVAL N MONTH syntax 2520 else exp.Interval(this=months_expr, unit=exp.var("MONTH")) 2521 ) 2522 2523 date_add_expr = exp.Add(this=this, expression=interval_or_to_months) 2524 2525 # Apply end-of-month preservation if Snowflake flag is set 2526 # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END 2527 preserve_eom = expression.args.get("preserve_end_of_month") 2528 result_expr = ( 2529 exp.case() 2530 .when( 2531 exp.EQ(this=exp.func("LAST_DAY", this), expression=this), 2532 exp.func("LAST_DAY", date_add_expr), 2533 ) 2534 .else_(date_add_expr) 2535 if preserve_eom 2536 else date_add_expr 2537 ) 2538 2539 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 2540 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 2541 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 2542 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 2543 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 2544 return self.sql(exp.Cast(this=result_expr, to=this.type)) 2545 return self.sql(result_expr)
Handles three key issues:
- Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
- End-of-month preservation: If input is last day of month, result is last day of result month
- Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
2559 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 2560 unit = unit_to_str(expression) 2561 date = expression.this 2562 result = self.func("DATE_TRUNC", unit, date) 2563 2564 if expression.args.get("input_type_preserved"): 2565 if not date.type: 2566 from sqlglot.optimizer.annotate_types import annotate_types 2567 2568 date = annotate_types(date, dialect=self.dialect) 2569 2570 if date.type and date.is_type(*exp.DataType.TEMPORAL_TYPES): 2571 return self.sql(exp.Cast(this=result, to=date.type)) 2572 return result
2574 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 2575 unit = unit_to_str(expression) 2576 zone = expression.args.get("zone") 2577 timestamp = expression.this 2578 2579 if is_date_unit(unit) and zone: 2580 # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC. 2581 # Double AT TIME ZONE needed for BigQuery compatibility: 2582 # 1. First AT TIME ZONE: ensures truncation happens in the target timezone 2583 # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component) 2584 timestamp = exp.AtTimeZone(this=timestamp, zone=zone) 2585 result_sql = self.func("DATE_TRUNC", unit, timestamp) 2586 return self.sql(exp.AtTimeZone(this=result_sql, zone=zone)) 2587 2588 result = self.func("DATE_TRUNC", unit, timestamp) 2589 if expression.args.get("input_type_preserved"): 2590 if not timestamp.type: 2591 from sqlglot.optimizer.annotate_types import annotate_types 2592 2593 timestamp = annotate_types(timestamp, dialect=self.dialect) 2594 2595 if timestamp.type and timestamp.is_type( 2596 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ 2597 ): 2598 dummy_date = exp.Cast( 2599 this=exp.Literal.string("1970-01-01"), 2600 to=exp.DataType(this=exp.DataType.Type.DATE), 2601 ) 2602 date_time = exp.Add(this=dummy_date, expression=timestamp) 2603 result = self.func("DATE_TRUNC", unit, date_time) 2604 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2605 2606 if timestamp.type and timestamp.is_type(*exp.DataType.TEMPORAL_TYPES): 2607 return self.sql(exp.Cast(this=result, to=timestamp.type)) 2608 return result
2610 def trim_sql(self, expression: exp.Trim) -> str: 2611 expression.this.replace(_cast_to_varchar(expression.this)) 2612 if expression.expression: 2613 expression.expression.replace(_cast_to_varchar(expression.expression)) 2614 2615 result_sql = super().trim_sql(expression) 2616 return _gen_with_cast_to_blob(self, expression, result_sql)
2618 def round_sql(self, expression: exp.Round) -> str: 2619 this = expression.this 2620 decimals = expression.args.get("decimals") 2621 truncate = expression.args.get("truncate") 2622 2623 # DuckDB requires the scale (decimals) argument to be an INT 2624 # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally 2625 if decimals is not None and expression.args.get("casts_non_integer_decimals"): 2626 if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)): 2627 decimals = exp.cast(decimals, exp.DataType.Type.INT) 2628 2629 func = "ROUND" 2630 if truncate: 2631 # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN 2632 if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"): 2633 func = "ROUND_EVEN" 2634 truncate = None 2635 # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO 2636 elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"): 2637 truncate = None 2638 2639 return self.func(func, this, decimals, truncate)
2641 def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str: 2642 """ 2643 BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values 2644 dividing the input distribution into n equal-sized buckets. 2645 2646 Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery 2647 does not document the specific algorithm used so results may differ. DuckDB does not 2648 support RESPECT NULLS. 2649 """ 2650 this = expression.this 2651 if isinstance(this, exp.Distinct): 2652 # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both 2653 if len(this.expressions) < 2: 2654 self.unsupported("APPROX_QUANTILES requires a bucket count argument") 2655 return self.function_fallback_sql(expression) 2656 num_quantiles_expr = this.expressions[1].pop() 2657 else: 2658 num_quantiles_expr = expression.expression 2659 2660 if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int: 2661 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2662 return self.function_fallback_sql(expression) 2663 2664 num_quantiles = t.cast(int, num_quantiles_expr.to_py()) 2665 if num_quantiles <= 0: 2666 self.unsupported("APPROX_QUANTILES bucket count must be a positive integer") 2667 return self.function_fallback_sql(expression) 2668 2669 quantiles = [ 2670 exp.Literal.number(Decimal(i) / Decimal(num_quantiles)) 2671 for i in range(num_quantiles + 1) 2672 ] 2673 2674 return self.sql( 2675 exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)) 2676 )
BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values dividing the input distribution into n equal-sized buckets.
Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery does not document the specific algorithm used so results may differ. DuckDB does not support RESPECT NULLS.
2685 def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str: 2686 this = expression.this 2687 2688 if _is_binary(this): 2689 expression.type = exp.DataType.build("BINARY") 2690 2691 arg = _cast_to_bit(this) 2692 2693 if isinstance(this, exp.Neg): 2694 arg = exp.Paren(this=arg) 2695 2696 expression.set("this", arg) 2697 2698 result_sql = f"~{self.sql(expression, 'this')}" 2699 2700 return _gen_with_cast_to_blob(self, expression, result_sql)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- MATCH_AGAINST_TABLE_PREFIX
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwiseor_sql
- bitwiserightshift_sql
- cast_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql