sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 remove_from_array_using_filter, 30 strposition_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47DATETIME_DELTA = t.Union[ 48 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 49] 50 51 52def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 53 this = expression.this 54 unit = unit_to_var(expression) 55 op = ( 56 "+" 57 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 58 else "-" 59 ) 60 61 to_type: t.Optional[DATA_TYPE] = None 62 if isinstance(expression, exp.TsOrDsAdd): 63 to_type = expression.return_type 64 elif this.is_string: 65 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 66 to_type = ( 67 exp.DataType.Type.DATETIME 68 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 69 else exp.DataType.Type.DATE 70 ) 71 72 this = exp.cast(this, to_type) if to_type else this 73 74 expr = expression.expression 75 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 76 77 return f"{self.sql(this)} {op} {self.sql(interval)}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 106def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 153 def _parse(self: DuckDB.Parser) -> exp.Show: 154 return self._parse_show_duckdb(*args, **kwargs) 155 156 return _parse 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 DATE_PART_MAPPING = { 294 **Dialect.DATE_PART_MAPPING, 295 "DAYOFWEEKISO": "ISODOW", 296 } 297 DATE_PART_MAPPING.pop("WEEKDAY") 298 299 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 300 if isinstance(path, exp.Literal): 301 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 302 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 303 # This check ensures we'll avoid trying to parse these as JSON paths, which can 304 # either result in a noisy warning or in an invalid representation of the path. 305 path_text = path.name 306 if path_text.startswith("/") or "[#" in path_text: 307 return path 308 309 return super().to_json_path(path) 310 311 class Tokenizer(tokens.Tokenizer): 312 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 313 HEREDOC_STRINGS = ["$"] 314 315 HEREDOC_TAG_IS_IDENTIFIER = True 316 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 317 318 KEYWORDS = { 319 **tokens.Tokenizer.KEYWORDS, 320 "//": TokenType.DIV, 321 "**": TokenType.DSTAR, 322 "^@": TokenType.CARET_AT, 323 "@>": TokenType.AT_GT, 324 "<@": TokenType.LT_AT, 325 "ATTACH": TokenType.ATTACH, 326 "BINARY": TokenType.VARBINARY, 327 "BITSTRING": TokenType.BIT, 328 "BPCHAR": TokenType.TEXT, 329 "CHAR": TokenType.TEXT, 330 "DATETIME": TokenType.TIMESTAMPNTZ, 331 "DETACH": TokenType.DETACH, 332 "EXCLUDE": TokenType.EXCEPT, 333 "LOGICAL": TokenType.BOOLEAN, 334 "ONLY": TokenType.ONLY, 335 "PIVOT_WIDER": TokenType.PIVOT, 336 "POSITIONAL": TokenType.POSITIONAL, 337 "SIGNED": TokenType.INT, 338 "STRING": TokenType.TEXT, 339 "SUMMARIZE": TokenType.SUMMARIZE, 340 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 341 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 342 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 343 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 344 "TIMESTAMP_US": TokenType.TIMESTAMP, 345 "UBIGINT": TokenType.UBIGINT, 346 "UINTEGER": TokenType.UINT, 347 "USMALLINT": TokenType.USMALLINT, 348 "UTINYINT": TokenType.UTINYINT, 349 "VARCHAR": TokenType.TEXT, 350 } 351 KEYWORDS.pop("/*+") 352 353 SINGLE_TOKENS = { 354 **tokens.Tokenizer.SINGLE_TOKENS, 355 "$": TokenType.PARAMETER, 356 } 357 358 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 359 360 class Parser(parser.Parser): 361 BITWISE = { 362 **parser.Parser.BITWISE, 363 TokenType.TILDA: exp.RegexpLike, 364 } 365 BITWISE.pop(TokenType.CARET) 366 367 RANGE_PARSERS = { 368 **parser.Parser.RANGE_PARSERS, 369 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 370 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 371 } 372 373 EXPONENT = { 374 **parser.Parser.EXPONENT, 375 TokenType.CARET: exp.Pow, 376 TokenType.DSTAR: exp.Pow, 377 } 378 379 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 380 381 SHOW_PARSERS = { 382 "TABLES": _show_parser("TABLES"), 383 "ALL TABLES": _show_parser("ALL TABLES"), 384 } 385 386 FUNCTIONS = { 387 **parser.Parser.FUNCTIONS, 388 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 389 "ARRAY_SORT": exp.SortArray.from_arg_list, 390 "DATEDIFF": _build_date_diff, 391 "DATE_DIFF": _build_date_diff, 392 "DATE_TRUNC": date_trunc_to_time, 393 "DATETRUNC": date_trunc_to_time, 394 "DECODE": lambda args: exp.Decode( 395 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 396 ), 397 "EDITDIST3": exp.Levenshtein.from_arg_list, 398 "ENCODE": lambda args: exp.Encode( 399 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 400 ), 401 "EPOCH": exp.TimeToUnix.from_arg_list, 402 "EPOCH_MS": lambda args: exp.UnixToTime( 403 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 404 ), 405 "GENERATE_SERIES": _build_generate_series(), 406 "JSON": exp.ParseJSON.from_arg_list, 407 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 408 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 409 "LIST_HAS": exp.ArrayContains.from_arg_list, 410 "LIST_REVERSE_SORT": _build_sort_array_desc, 411 "LIST_SORT": exp.SortArray.from_arg_list, 412 "LIST_VALUE": lambda args: exp.Array(expressions=args), 413 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 414 "MAKE_TIMESTAMP": _build_make_timestamp, 415 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 416 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 419 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 420 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 421 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 422 this=seq_get(args, 0), 423 expression=seq_get(args, 1), 424 replacement=seq_get(args, 2), 425 modifiers=seq_get(args, 3), 426 ), 427 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 428 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 429 "STRING_SPLIT": exp.Split.from_arg_list, 430 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 431 "STRING_TO_ARRAY": exp.Split.from_arg_list, 432 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 433 "STRUCT_PACK": exp.Struct.from_arg_list, 434 "STR_SPLIT": exp.Split.from_arg_list, 435 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 436 "TIME_BUCKET": exp.DateBin.from_arg_list, 437 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 438 "UNNEST": exp.Explode.from_arg_list, 439 "XOR": binary_from_function(exp.BitwiseXor), 440 } 441 442 FUNCTIONS.pop("DATE_SUB") 443 FUNCTIONS.pop("GLOB") 444 445 FUNCTION_PARSERS = { 446 **parser.Parser.FUNCTION_PARSERS, 447 **dict.fromkeys( 448 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 449 ), 450 } 451 FUNCTION_PARSERS.pop("DECODE") 452 453 NO_PAREN_FUNCTION_PARSERS = { 454 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 455 "MAP": lambda self: self._parse_map(), 456 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 457 } 458 459 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 460 TokenType.SEMI, 461 TokenType.ANTI, 462 } 463 464 PLACEHOLDER_PARSERS = { 465 **parser.Parser.PLACEHOLDER_PARSERS, 466 TokenType.PARAMETER: lambda self: ( 467 self.expression(exp.Placeholder, this=self._prev.text) 468 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 469 else None 470 ), 471 } 472 473 TYPE_CONVERTERS = { 474 # https://duckdb.org/docs/sql/data_types/numeric 475 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 476 # https://duckdb.org/docs/sql/data_types/text 477 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 478 } 479 480 STATEMENT_PARSERS = { 481 **parser.Parser.STATEMENT_PARSERS, 482 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 483 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 484 TokenType.SHOW: lambda self: self._parse_show(), 485 } 486 487 def _parse_expression(self) -> t.Optional[exp.Expression]: 488 # DuckDB supports prefix aliases, e.g. foo: 1 489 if self._next and self._next.token_type == TokenType.COLON: 490 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 491 self._match(TokenType.COLON) 492 comments = self._prev_comments or [] 493 494 this = self._parse_assignment() 495 if isinstance(this, exp.Expression): 496 # Moves the comment next to the alias in `alias: expr /* comment */` 497 comments += this.pop_comments() or [] 498 499 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 500 501 return super()._parse_expression() 502 503 def _parse_table( 504 self, 505 schema: bool = False, 506 joins: bool = False, 507 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 508 parse_bracket: bool = False, 509 is_db_reference: bool = False, 510 parse_partition: bool = False, 511 ) -> t.Optional[exp.Expression]: 512 # DuckDB supports prefix aliases, e.g. FROM foo: bar 513 if self._next and self._next.token_type == TokenType.COLON: 514 alias = self._parse_table_alias( 515 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 516 ) 517 self._match(TokenType.COLON) 518 comments = self._prev_comments or [] 519 else: 520 alias = None 521 comments = [] 522 523 table = super()._parse_table( 524 schema=schema, 525 joins=joins, 526 alias_tokens=alias_tokens, 527 parse_bracket=parse_bracket, 528 is_db_reference=is_db_reference, 529 parse_partition=parse_partition, 530 ) 531 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 532 # Moves the comment next to the alias in `alias: table /* comment */` 533 comments += table.pop_comments() or [] 534 alias.comments = alias.pop_comments() + comments 535 table.set("alias", alias) 536 537 return table 538 539 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 540 # https://duckdb.org/docs/sql/samples.html 541 sample = super()._parse_table_sample(as_modifier=as_modifier) 542 if sample and not sample.args.get("method"): 543 if sample.args.get("size"): 544 sample.set("method", exp.var("RESERVOIR")) 545 else: 546 sample.set("method", exp.var("SYSTEM")) 547 548 return sample 549 550 def _parse_bracket( 551 self, this: t.Optional[exp.Expression] = None 552 ) -> t.Optional[exp.Expression]: 553 bracket = super()._parse_bracket(this) 554 555 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 556 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 557 bracket.set("returns_list_for_maps", True) 558 559 return bracket 560 561 def _parse_map(self) -> exp.ToMap | exp.Map: 562 if self._match(TokenType.L_BRACE, advance=False): 563 return self.expression(exp.ToMap, this=self._parse_bracket()) 564 565 args = self._parse_wrapped_csv(self._parse_assignment) 566 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 567 568 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 569 return self._parse_field_def() 570 571 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 572 if len(aggregations) == 1: 573 return super()._pivot_column_names(aggregations) 574 return pivot_column_names(aggregations, dialect="duckdb") 575 576 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 577 def _parse_attach_option() -> exp.AttachOption: 578 return self.expression( 579 exp.AttachOption, 580 this=self._parse_var(any_token=True), 581 expression=self._parse_field(any_token=True), 582 ) 583 584 self._match(TokenType.DATABASE) 585 exists = self._parse_exists(not_=is_attach) 586 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 587 588 if self._match(TokenType.L_PAREN, advance=False): 589 expressions = self._parse_wrapped_csv(_parse_attach_option) 590 else: 591 expressions = None 592 593 return ( 594 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 595 if is_attach 596 else self.expression(exp.Detach, this=this, exists=exists) 597 ) 598 599 def _parse_show_duckdb(self, this: str) -> exp.Show: 600 return self.expression(exp.Show, this=this) 601 602 class Generator(generator.Generator): 603 PARAMETER_TOKEN = "$" 604 NAMED_PLACEHOLDER_TOKEN = "$" 605 JOIN_HINTS = False 606 TABLE_HINTS = False 607 QUERY_HINTS = False 608 LIMIT_FETCH = "LIMIT" 609 STRUCT_DELIMITER = ("(", ")") 610 RENAME_TABLE_WITH_DB = False 611 NVL2_SUPPORTED = False 612 SEMI_ANTI_JOIN_WITH_SIDE = False 613 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 614 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 615 LAST_DAY_SUPPORTS_DATE_PART = False 616 JSON_KEY_VALUE_PAIR_SEP = "," 617 IGNORE_NULLS_IN_FUNC = True 618 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 619 SUPPORTS_CREATE_TABLE_LIKE = False 620 MULTI_ARG_DISTINCT = False 621 CAN_IMPLEMENT_ARRAY_ANY = True 622 SUPPORTS_TO_NUMBER = False 623 SUPPORTS_WINDOW_EXCLUDE = True 624 COPY_HAS_INTO_KEYWORD = False 625 STAR_EXCEPT = "EXCLUDE" 626 PAD_FILL_PATTERN_IS_REQUIRED = True 627 ARRAY_CONCAT_IS_VAR_LEN = False 628 ARRAY_SIZE_DIM_REQUIRED = False 629 NORMALIZE_EXTRACT_DATE_PARTS = True 630 631 TRANSFORMS = { 632 **generator.Generator.TRANSFORMS, 633 exp.ApproxDistinct: approx_count_distinct_sql, 634 exp.Array: inline_array_unless_query, 635 exp.ArrayFilter: rename_func("LIST_FILTER"), 636 exp.ArrayRemove: remove_from_array_using_filter, 637 exp.ArraySort: _array_sort_sql, 638 exp.ArraySum: rename_func("LIST_SUM"), 639 exp.BitwiseXor: rename_func("XOR"), 640 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 641 exp.CurrentDate: lambda *_: "CURRENT_DATE", 642 exp.CurrentTime: lambda *_: "CURRENT_TIME", 643 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 644 exp.DayOfMonth: rename_func("DAYOFMONTH"), 645 exp.DayOfWeek: rename_func("DAYOFWEEK"), 646 exp.DayOfWeekIso: rename_func("ISODOW"), 647 exp.DayOfYear: rename_func("DAYOFYEAR"), 648 exp.DataType: _datatype_sql, 649 exp.Date: _date_sql, 650 exp.DateAdd: _date_delta_sql, 651 exp.DateFromParts: rename_func("MAKE_DATE"), 652 exp.DateSub: _date_delta_sql, 653 exp.DateDiff: _date_diff_sql, 654 exp.DateStrToDate: datestrtodate_sql, 655 exp.Datetime: no_datetime_sql, 656 exp.DatetimeSub: _date_delta_sql, 657 exp.DatetimeAdd: _date_delta_sql, 658 exp.DateToDi: lambda self, 659 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 660 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 661 exp.DiToDate: lambda self, 662 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 663 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 664 exp.GenerateDateArray: _generate_datetime_array_sql, 665 exp.GenerateTimestampArray: _generate_datetime_array_sql, 666 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 667 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 668 exp.Explode: rename_func("UNNEST"), 669 exp.IntDiv: lambda self, e: self.binary(e, "//"), 670 exp.IsInf: rename_func("ISINF"), 671 exp.IsNan: rename_func("ISNAN"), 672 exp.JSONBExists: rename_func("JSON_EXISTS"), 673 exp.JSONExtract: _arrow_json_extract_sql, 674 exp.JSONExtractArray: _json_extract_value_array_sql, 675 exp.JSONExtractScalar: _arrow_json_extract_sql, 676 exp.JSONFormat: _json_format_sql, 677 exp.JSONValueArray: _json_extract_value_array_sql, 678 exp.Lateral: explode_to_unnest_sql, 679 exp.LogicalOr: rename_func("BOOL_OR"), 680 exp.LogicalAnd: rename_func("BOOL_AND"), 681 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 682 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 683 exp.MonthsBetween: lambda self, e: self.func( 684 "DATEDIFF", 685 "'month'", 686 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 687 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 688 ), 689 exp.PercentileCont: rename_func("QUANTILE_CONT"), 690 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 691 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 692 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 693 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 694 exp.RegexpReplace: lambda self, e: self.func( 695 "REGEXP_REPLACE", 696 e.this, 697 e.expression, 698 e.args.get("replacement"), 699 e.args.get("modifiers"), 700 ), 701 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 702 exp.RegexpILike: lambda self, e: self.func( 703 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 704 ), 705 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 706 exp.Return: lambda self, e: self.sql(e, "this"), 707 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 708 exp.Rand: rename_func("RANDOM"), 709 exp.SHA: rename_func("SHA1"), 710 exp.SHA2: sha256_sql, 711 exp.Split: rename_func("STR_SPLIT"), 712 exp.SortArray: _sort_array_sql, 713 exp.StrPosition: strposition_sql, 714 exp.StrToUnix: lambda self, e: self.func( 715 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 716 ), 717 exp.Struct: _struct_sql, 718 exp.Transform: rename_func("LIST_TRANSFORM"), 719 exp.TimeAdd: _date_delta_sql, 720 exp.Time: no_time_sql, 721 exp.TimeDiff: _timediff_sql, 722 exp.Timestamp: no_timestamp_sql, 723 exp.TimestampDiff: lambda self, e: self.func( 724 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 725 ), 726 exp.TimestampTrunc: timestamptrunc_sql(), 727 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 728 exp.TimeStrToTime: timestrtotime_sql, 729 exp.TimeStrToUnix: lambda self, e: self.func( 730 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 731 ), 732 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 733 exp.TimeToUnix: rename_func("EPOCH"), 734 exp.TsOrDiToDi: lambda self, 735 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 736 exp.TsOrDsAdd: _date_delta_sql, 737 exp.TsOrDsDiff: lambda self, e: self.func( 738 "DATE_DIFF", 739 f"'{e.args.get('unit') or 'DAY'}'", 740 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 741 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 742 ), 743 exp.UnixToStr: lambda self, e: self.func( 744 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 745 ), 746 exp.DatetimeTrunc: lambda self, e: self.func( 747 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 748 ), 749 exp.UnixToTime: _unix_to_time_sql, 750 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 751 exp.VariancePop: rename_func("VAR_POP"), 752 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 753 exp.Xor: bool_xor_sql, 754 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 755 rename_func("LEVENSHTEIN") 756 ), 757 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 758 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 759 exp.DateBin: rename_func("TIME_BUCKET"), 760 } 761 762 SUPPORTED_JSON_PATH_PARTS = { 763 exp.JSONPathKey, 764 exp.JSONPathRoot, 765 exp.JSONPathSubscript, 766 exp.JSONPathWildcard, 767 } 768 769 TYPE_MAPPING = { 770 **generator.Generator.TYPE_MAPPING, 771 exp.DataType.Type.BINARY: "BLOB", 772 exp.DataType.Type.BPCHAR: "TEXT", 773 exp.DataType.Type.CHAR: "TEXT", 774 exp.DataType.Type.DATETIME: "TIMESTAMP", 775 exp.DataType.Type.FLOAT: "REAL", 776 exp.DataType.Type.JSONB: "JSON", 777 exp.DataType.Type.NCHAR: "TEXT", 778 exp.DataType.Type.NVARCHAR: "TEXT", 779 exp.DataType.Type.UINT: "UINTEGER", 780 exp.DataType.Type.VARBINARY: "BLOB", 781 exp.DataType.Type.ROWVERSION: "BLOB", 782 exp.DataType.Type.VARCHAR: "TEXT", 783 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 784 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 785 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 786 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 787 } 788 789 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 790 RESERVED_KEYWORDS = { 791 "array", 792 "analyse", 793 "union", 794 "all", 795 "when", 796 "in_p", 797 "default", 798 "create_p", 799 "window", 800 "asymmetric", 801 "to", 802 "else", 803 "localtime", 804 "from", 805 "end_p", 806 "select", 807 "current_date", 808 "foreign", 809 "with", 810 "grant", 811 "session_user", 812 "or", 813 "except", 814 "references", 815 "fetch", 816 "limit", 817 "group_p", 818 "leading", 819 "into", 820 "collate", 821 "offset", 822 "do", 823 "then", 824 "localtimestamp", 825 "check_p", 826 "lateral_p", 827 "current_role", 828 "where", 829 "asc_p", 830 "placing", 831 "desc_p", 832 "user", 833 "unique", 834 "initially", 835 "column", 836 "both", 837 "some", 838 "as", 839 "any", 840 "only", 841 "deferrable", 842 "null_p", 843 "current_time", 844 "true_p", 845 "table", 846 "case", 847 "trailing", 848 "variadic", 849 "for", 850 "on", 851 "distinct", 852 "false_p", 853 "not", 854 "constraint", 855 "current_timestamp", 856 "returning", 857 "primary", 858 "intersect", 859 "having", 860 "analyze", 861 "current_user", 862 "and", 863 "cast", 864 "symmetric", 865 "using", 866 "order", 867 "current_catalog", 868 } 869 870 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 871 872 # DuckDB doesn't generally support CREATE TABLE .. properties 873 # https://duckdb.org/docs/sql/statements/create_table.html 874 PROPERTIES_LOCATION = { 875 prop: exp.Properties.Location.UNSUPPORTED 876 for prop in generator.Generator.PROPERTIES_LOCATION 877 } 878 879 # There are a few exceptions (e.g. temporary tables) which are supported or 880 # can be transpiled to DuckDB, so we explicitly override them accordingly 881 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 882 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 883 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 884 885 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 886 exp.FirstValue, 887 exp.Lag, 888 exp.LastValue, 889 exp.Lead, 890 exp.NthValue, 891 ) 892 893 def show_sql(self, expression: exp.Show) -> str: 894 return f"SHOW {expression.name}" 895 896 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 897 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 898 899 def strtotime_sql(self, expression: exp.StrToTime) -> str: 900 if expression.args.get("safe"): 901 formatted_time = self.format_time(expression) 902 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 903 return str_to_time_sql(self, expression) 904 905 def strtodate_sql(self, expression: exp.StrToDate) -> str: 906 if expression.args.get("safe"): 907 formatted_time = self.format_time(expression) 908 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 909 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 910 911 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 912 arg = expression.this 913 if expression.args.get("safe"): 914 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 915 return self.func("JSON", arg) 916 917 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 918 nano = expression.args.get("nano") 919 if nano is not None: 920 expression.set( 921 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 922 ) 923 924 return rename_func("MAKE_TIME")(self, expression) 925 926 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 927 sec = expression.args["sec"] 928 929 milli = expression.args.get("milli") 930 if milli is not None: 931 sec += milli.pop() / exp.Literal.number(1000.0) 932 933 nano = expression.args.get("nano") 934 if nano is not None: 935 sec += nano.pop() / exp.Literal.number(1000000000.0) 936 937 if milli or nano: 938 expression.set("sec", sec) 939 940 return rename_func("MAKE_TIMESTAMP")(self, expression) 941 942 def tablesample_sql( 943 self, 944 expression: exp.TableSample, 945 tablesample_keyword: t.Optional[str] = None, 946 ) -> str: 947 if not isinstance(expression.parent, exp.Select): 948 # This sample clause only applies to a single source, not the entire resulting relation 949 tablesample_keyword = "TABLESAMPLE" 950 951 if expression.args.get("size"): 952 method = expression.args.get("method") 953 if method and method.name.upper() != "RESERVOIR": 954 self.unsupported( 955 f"Sampling method {method} is not supported with a discrete sample count, " 956 "defaulting to reservoir sampling" 957 ) 958 expression.set("method", exp.var("RESERVOIR")) 959 960 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 961 962 def interval_sql(self, expression: exp.Interval) -> str: 963 multiplier: t.Optional[int] = None 964 unit = expression.text("unit").lower() 965 966 if unit.startswith("week"): 967 multiplier = 7 968 if unit.startswith("quarter"): 969 multiplier = 90 970 971 if multiplier: 972 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 973 974 return super().interval_sql(expression) 975 976 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 977 if isinstance(expression.parent, exp.UserDefinedFunction): 978 return self.sql(expression, "this") 979 return super().columndef_sql(expression, sep) 980 981 def join_sql(self, expression: exp.Join) -> str: 982 if ( 983 expression.side == "LEFT" 984 and not expression.args.get("on") 985 and isinstance(expression.this, exp.Unnest) 986 ): 987 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 988 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 989 return super().join_sql(expression.on(exp.true())) 990 991 return super().join_sql(expression) 992 993 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 994 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 995 if expression.args.get("is_end_exclusive"): 996 return rename_func("RANGE")(self, expression) 997 998 return self.function_fallback_sql(expression) 999 1000 def countif_sql(self, expression: exp.CountIf) -> str: 1001 if self.dialect.version >= Version("1.2"): 1002 return self.function_fallback_sql(expression) 1003 1004 # https://github.com/tobymao/sqlglot/pull/4749 1005 return count_if_to_sum(self, expression) 1006 1007 def bracket_sql(self, expression: exp.Bracket) -> str: 1008 if self.dialect.version >= Version("1.2"): 1009 return super().bracket_sql(expression) 1010 1011 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1012 this = expression.this 1013 if isinstance(this, exp.Array): 1014 this.replace(exp.paren(this)) 1015 1016 bracket = super().bracket_sql(expression) 1017 1018 if not expression.args.get("returns_list_for_maps"): 1019 if not this.type: 1020 from sqlglot.optimizer.annotate_types import annotate_types 1021 1022 this = annotate_types(this, dialect=self.dialect) 1023 1024 if this.is_type(exp.DataType.Type.MAP): 1025 bracket = f"({bracket})[1]" 1026 1027 return bracket 1028 1029 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1030 expression_sql = self.sql(expression, "expression") 1031 1032 func = expression.this 1033 if isinstance(func, exp.PERCENTILES): 1034 # Make the order key the first arg and slide the fraction to the right 1035 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1036 order_col = expression.find(exp.Ordered) 1037 if order_col: 1038 func.set("expression", func.this) 1039 func.set("this", order_col.this) 1040 1041 this = self.sql(expression, "this").rstrip(")") 1042 1043 return f"{this}{expression_sql})" 1044 1045 def length_sql(self, expression: exp.Length) -> str: 1046 arg = expression.this 1047 1048 # Dialects like BQ and Snowflake also accept binary values as args, so 1049 # DDB will attempt to infer the type or resort to case/when resolution 1050 if not expression.args.get("binary") or arg.is_string: 1051 return self.func("LENGTH", arg) 1052 1053 if not arg.type: 1054 from sqlglot.optimizer.annotate_types import annotate_types 1055 1056 arg = annotate_types(arg, dialect=self.dialect) 1057 1058 if arg.is_type(*exp.DataType.TEXT_TYPES): 1059 return self.func("LENGTH", arg) 1060 1061 # We need these casts to make duckdb's static type checker happy 1062 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1063 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1064 1065 case = ( 1066 exp.case(self.func("TYPEOF", arg)) 1067 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1068 .else_( 1069 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1070 ) # anonymous to break length_sql recursion 1071 ) 1072 1073 return self.sql(case) 1074 1075 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1076 this = expression.this 1077 key = expression.args.get("key") 1078 key_sql = key.name if isinstance(key, exp.Expression) else "" 1079 value_sql = self.sql(expression, "value") 1080 1081 kv_sql = f"{key_sql} := {value_sql}" 1082 1083 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1084 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1085 if isinstance(this, exp.Struct) and not this.expressions: 1086 return self.func("STRUCT_PACK", kv_sql) 1087 1088 return self.func("STRUCT_INSERT", this, kv_sql) 1089 1090 def unnest_sql(self, expression: exp.Unnest) -> str: 1091 explode_array = expression.args.get("explode_array") 1092 if explode_array: 1093 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1094 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1095 expression.expressions.append( 1096 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1097 ) 1098 1099 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1100 alias = expression.args.get("alias") 1101 if alias: 1102 expression.set("alias", None) 1103 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1104 1105 unnest_sql = super().unnest_sql(expression) 1106 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1107 return self.sql(select) 1108 1109 return super().unnest_sql(expression) 1110 1111 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1112 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1113 # DuckDB should render IGNORE NULLS only for the general-purpose 1114 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1115 return super().ignorenulls_sql(expression) 1116 1117 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1118 return self.sql(expression, "this") 1119 1120 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1121 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1122 # DuckDB should render RESPECT NULLS only for the general-purpose 1123 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1124 return super().respectnulls_sql(expression) 1125 1126 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1127 return self.sql(expression, "this") 1128 1129 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1130 this = self.sql(expression, "this") 1131 null_text = self.sql(expression, "null") 1132 1133 if null_text: 1134 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1135 1136 return self.func("ARRAY_TO_STRING", this, expression.expression) 1137 1138 @unsupported_args("position", "occurrence") 1139 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1140 group = expression.args.get("group") 1141 params = expression.args.get("parameters") 1142 1143 # Do not render group if there is no following argument, 1144 # and it's the default value for this dialect 1145 if ( 1146 not params 1147 and group 1148 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1149 ): 1150 group = None 1151 return self.func( 1152 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1153 ) 1154 1155 @unsupported_args("culture") 1156 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1157 fmt = expression.args.get("format") 1158 if fmt and fmt.is_int: 1159 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1160 1161 self.unsupported("Only integer formats are supported by NumberToStr") 1162 return self.function_fallback_sql(expression) 1163 1164 def autoincrementcolumnconstraint_sql(self, _) -> str: 1165 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1166 return ""
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 DATE_PART_MAPPING = { 295 **Dialect.DATE_PART_MAPPING, 296 "DAYOFWEEKISO": "ISODOW", 297 } 298 DATE_PART_MAPPING.pop("WEEKDAY") 299 300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path) 311 312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "DATETIME": TokenType.TIMESTAMPNTZ, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 360 361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this) 602 603 class Generator(generator.Generator): 604 PARAMETER_TOKEN = "$" 605 NAMED_PLACEHOLDER_TOKEN = "$" 606 JOIN_HINTS = False 607 TABLE_HINTS = False 608 QUERY_HINTS = False 609 LIMIT_FETCH = "LIMIT" 610 STRUCT_DELIMITER = ("(", ")") 611 RENAME_TABLE_WITH_DB = False 612 NVL2_SUPPORTED = False 613 SEMI_ANTI_JOIN_WITH_SIDE = False 614 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 615 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 616 LAST_DAY_SUPPORTS_DATE_PART = False 617 JSON_KEY_VALUE_PAIR_SEP = "," 618 IGNORE_NULLS_IN_FUNC = True 619 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 620 SUPPORTS_CREATE_TABLE_LIKE = False 621 MULTI_ARG_DISTINCT = False 622 CAN_IMPLEMENT_ARRAY_ANY = True 623 SUPPORTS_TO_NUMBER = False 624 SUPPORTS_WINDOW_EXCLUDE = True 625 COPY_HAS_INTO_KEYWORD = False 626 STAR_EXCEPT = "EXCLUDE" 627 PAD_FILL_PATTERN_IS_REQUIRED = True 628 ARRAY_CONCAT_IS_VAR_LEN = False 629 ARRAY_SIZE_DIM_REQUIRED = False 630 NORMALIZE_EXTRACT_DATE_PARTS = True 631 632 TRANSFORMS = { 633 **generator.Generator.TRANSFORMS, 634 exp.ApproxDistinct: approx_count_distinct_sql, 635 exp.Array: inline_array_unless_query, 636 exp.ArrayFilter: rename_func("LIST_FILTER"), 637 exp.ArrayRemove: remove_from_array_using_filter, 638 exp.ArraySort: _array_sort_sql, 639 exp.ArraySum: rename_func("LIST_SUM"), 640 exp.BitwiseXor: rename_func("XOR"), 641 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 642 exp.CurrentDate: lambda *_: "CURRENT_DATE", 643 exp.CurrentTime: lambda *_: "CURRENT_TIME", 644 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 645 exp.DayOfMonth: rename_func("DAYOFMONTH"), 646 exp.DayOfWeek: rename_func("DAYOFWEEK"), 647 exp.DayOfWeekIso: rename_func("ISODOW"), 648 exp.DayOfYear: rename_func("DAYOFYEAR"), 649 exp.DataType: _datatype_sql, 650 exp.Date: _date_sql, 651 exp.DateAdd: _date_delta_sql, 652 exp.DateFromParts: rename_func("MAKE_DATE"), 653 exp.DateSub: _date_delta_sql, 654 exp.DateDiff: _date_diff_sql, 655 exp.DateStrToDate: datestrtodate_sql, 656 exp.Datetime: no_datetime_sql, 657 exp.DatetimeSub: _date_delta_sql, 658 exp.DatetimeAdd: _date_delta_sql, 659 exp.DateToDi: lambda self, 660 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 661 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 662 exp.DiToDate: lambda self, 663 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 664 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 665 exp.GenerateDateArray: _generate_datetime_array_sql, 666 exp.GenerateTimestampArray: _generate_datetime_array_sql, 667 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 668 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 669 exp.Explode: rename_func("UNNEST"), 670 exp.IntDiv: lambda self, e: self.binary(e, "//"), 671 exp.IsInf: rename_func("ISINF"), 672 exp.IsNan: rename_func("ISNAN"), 673 exp.JSONBExists: rename_func("JSON_EXISTS"), 674 exp.JSONExtract: _arrow_json_extract_sql, 675 exp.JSONExtractArray: _json_extract_value_array_sql, 676 exp.JSONExtractScalar: _arrow_json_extract_sql, 677 exp.JSONFormat: _json_format_sql, 678 exp.JSONValueArray: _json_extract_value_array_sql, 679 exp.Lateral: explode_to_unnest_sql, 680 exp.LogicalOr: rename_func("BOOL_OR"), 681 exp.LogicalAnd: rename_func("BOOL_AND"), 682 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 683 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 684 exp.MonthsBetween: lambda self, e: self.func( 685 "DATEDIFF", 686 "'month'", 687 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 688 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 689 ), 690 exp.PercentileCont: rename_func("QUANTILE_CONT"), 691 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 692 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 693 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 694 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 695 exp.RegexpReplace: lambda self, e: self.func( 696 "REGEXP_REPLACE", 697 e.this, 698 e.expression, 699 e.args.get("replacement"), 700 e.args.get("modifiers"), 701 ), 702 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 703 exp.RegexpILike: lambda self, e: self.func( 704 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 705 ), 706 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 707 exp.Return: lambda self, e: self.sql(e, "this"), 708 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 709 exp.Rand: rename_func("RANDOM"), 710 exp.SHA: rename_func("SHA1"), 711 exp.SHA2: sha256_sql, 712 exp.Split: rename_func("STR_SPLIT"), 713 exp.SortArray: _sort_array_sql, 714 exp.StrPosition: strposition_sql, 715 exp.StrToUnix: lambda self, e: self.func( 716 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 717 ), 718 exp.Struct: _struct_sql, 719 exp.Transform: rename_func("LIST_TRANSFORM"), 720 exp.TimeAdd: _date_delta_sql, 721 exp.Time: no_time_sql, 722 exp.TimeDiff: _timediff_sql, 723 exp.Timestamp: no_timestamp_sql, 724 exp.TimestampDiff: lambda self, e: self.func( 725 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 726 ), 727 exp.TimestampTrunc: timestamptrunc_sql(), 728 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 729 exp.TimeStrToTime: timestrtotime_sql, 730 exp.TimeStrToUnix: lambda self, e: self.func( 731 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 732 ), 733 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 734 exp.TimeToUnix: rename_func("EPOCH"), 735 exp.TsOrDiToDi: lambda self, 736 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 737 exp.TsOrDsAdd: _date_delta_sql, 738 exp.TsOrDsDiff: lambda self, e: self.func( 739 "DATE_DIFF", 740 f"'{e.args.get('unit') or 'DAY'}'", 741 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 742 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 743 ), 744 exp.UnixToStr: lambda self, e: self.func( 745 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 746 ), 747 exp.DatetimeTrunc: lambda self, e: self.func( 748 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 749 ), 750 exp.UnixToTime: _unix_to_time_sql, 751 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 752 exp.VariancePop: rename_func("VAR_POP"), 753 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 754 exp.Xor: bool_xor_sql, 755 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 756 rename_func("LEVENSHTEIN") 757 ), 758 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 759 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 760 exp.DateBin: rename_func("TIME_BUCKET"), 761 } 762 763 SUPPORTED_JSON_PATH_PARTS = { 764 exp.JSONPathKey, 765 exp.JSONPathRoot, 766 exp.JSONPathSubscript, 767 exp.JSONPathWildcard, 768 } 769 770 TYPE_MAPPING = { 771 **generator.Generator.TYPE_MAPPING, 772 exp.DataType.Type.BINARY: "BLOB", 773 exp.DataType.Type.BPCHAR: "TEXT", 774 exp.DataType.Type.CHAR: "TEXT", 775 exp.DataType.Type.DATETIME: "TIMESTAMP", 776 exp.DataType.Type.FLOAT: "REAL", 777 exp.DataType.Type.JSONB: "JSON", 778 exp.DataType.Type.NCHAR: "TEXT", 779 exp.DataType.Type.NVARCHAR: "TEXT", 780 exp.DataType.Type.UINT: "UINTEGER", 781 exp.DataType.Type.VARBINARY: "BLOB", 782 exp.DataType.Type.ROWVERSION: "BLOB", 783 exp.DataType.Type.VARCHAR: "TEXT", 784 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 785 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 786 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 787 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 788 } 789 790 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 791 RESERVED_KEYWORDS = { 792 "array", 793 "analyse", 794 "union", 795 "all", 796 "when", 797 "in_p", 798 "default", 799 "create_p", 800 "window", 801 "asymmetric", 802 "to", 803 "else", 804 "localtime", 805 "from", 806 "end_p", 807 "select", 808 "current_date", 809 "foreign", 810 "with", 811 "grant", 812 "session_user", 813 "or", 814 "except", 815 "references", 816 "fetch", 817 "limit", 818 "group_p", 819 "leading", 820 "into", 821 "collate", 822 "offset", 823 "do", 824 "then", 825 "localtimestamp", 826 "check_p", 827 "lateral_p", 828 "current_role", 829 "where", 830 "asc_p", 831 "placing", 832 "desc_p", 833 "user", 834 "unique", 835 "initially", 836 "column", 837 "both", 838 "some", 839 "as", 840 "any", 841 "only", 842 "deferrable", 843 "null_p", 844 "current_time", 845 "true_p", 846 "table", 847 "case", 848 "trailing", 849 "variadic", 850 "for", 851 "on", 852 "distinct", 853 "false_p", 854 "not", 855 "constraint", 856 "current_timestamp", 857 "returning", 858 "primary", 859 "intersect", 860 "having", 861 "analyze", 862 "current_user", 863 "and", 864 "cast", 865 "symmetric", 866 "using", 867 "order", 868 "current_catalog", 869 } 870 871 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 872 873 # DuckDB doesn't generally support CREATE TABLE .. properties 874 # https://duckdb.org/docs/sql/statements/create_table.html 875 PROPERTIES_LOCATION = { 876 prop: exp.Properties.Location.UNSUPPORTED 877 for prop in generator.Generator.PROPERTIES_LOCATION 878 } 879 880 # There are a few exceptions (e.g. temporary tables) which are supported or 881 # can be transpiled to DuckDB, so we explicitly override them accordingly 882 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 883 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 884 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 885 886 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 887 exp.FirstValue, 888 exp.Lag, 889 exp.LastValue, 890 exp.Lead, 891 exp.NthValue, 892 ) 893 894 def show_sql(self, expression: exp.Show) -> str: 895 return f"SHOW {expression.name}" 896 897 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 898 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 899 900 def strtotime_sql(self, expression: exp.StrToTime) -> str: 901 if expression.args.get("safe"): 902 formatted_time = self.format_time(expression) 903 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 904 return str_to_time_sql(self, expression) 905 906 def strtodate_sql(self, expression: exp.StrToDate) -> str: 907 if expression.args.get("safe"): 908 formatted_time = self.format_time(expression) 909 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 910 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 911 912 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 913 arg = expression.this 914 if expression.args.get("safe"): 915 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 916 return self.func("JSON", arg) 917 918 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 919 nano = expression.args.get("nano") 920 if nano is not None: 921 expression.set( 922 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 923 ) 924 925 return rename_func("MAKE_TIME")(self, expression) 926 927 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 928 sec = expression.args["sec"] 929 930 milli = expression.args.get("milli") 931 if milli is not None: 932 sec += milli.pop() / exp.Literal.number(1000.0) 933 934 nano = expression.args.get("nano") 935 if nano is not None: 936 sec += nano.pop() / exp.Literal.number(1000000000.0) 937 938 if milli or nano: 939 expression.set("sec", sec) 940 941 return rename_func("MAKE_TIMESTAMP")(self, expression) 942 943 def tablesample_sql( 944 self, 945 expression: exp.TableSample, 946 tablesample_keyword: t.Optional[str] = None, 947 ) -> str: 948 if not isinstance(expression.parent, exp.Select): 949 # This sample clause only applies to a single source, not the entire resulting relation 950 tablesample_keyword = "TABLESAMPLE" 951 952 if expression.args.get("size"): 953 method = expression.args.get("method") 954 if method and method.name.upper() != "RESERVOIR": 955 self.unsupported( 956 f"Sampling method {method} is not supported with a discrete sample count, " 957 "defaulting to reservoir sampling" 958 ) 959 expression.set("method", exp.var("RESERVOIR")) 960 961 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 962 963 def interval_sql(self, expression: exp.Interval) -> str: 964 multiplier: t.Optional[int] = None 965 unit = expression.text("unit").lower() 966 967 if unit.startswith("week"): 968 multiplier = 7 969 if unit.startswith("quarter"): 970 multiplier = 90 971 972 if multiplier: 973 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 974 975 return super().interval_sql(expression) 976 977 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 978 if isinstance(expression.parent, exp.UserDefinedFunction): 979 return self.sql(expression, "this") 980 return super().columndef_sql(expression, sep) 981 982 def join_sql(self, expression: exp.Join) -> str: 983 if ( 984 expression.side == "LEFT" 985 and not expression.args.get("on") 986 and isinstance(expression.this, exp.Unnest) 987 ): 988 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 989 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 990 return super().join_sql(expression.on(exp.true())) 991 992 return super().join_sql(expression) 993 994 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 995 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 996 if expression.args.get("is_end_exclusive"): 997 return rename_func("RANGE")(self, expression) 998 999 return self.function_fallback_sql(expression) 1000 1001 def countif_sql(self, expression: exp.CountIf) -> str: 1002 if self.dialect.version >= Version("1.2"): 1003 return self.function_fallback_sql(expression) 1004 1005 # https://github.com/tobymao/sqlglot/pull/4749 1006 return count_if_to_sum(self, expression) 1007 1008 def bracket_sql(self, expression: exp.Bracket) -> str: 1009 if self.dialect.version >= Version("1.2"): 1010 return super().bracket_sql(expression) 1011 1012 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1013 this = expression.this 1014 if isinstance(this, exp.Array): 1015 this.replace(exp.paren(this)) 1016 1017 bracket = super().bracket_sql(expression) 1018 1019 if not expression.args.get("returns_list_for_maps"): 1020 if not this.type: 1021 from sqlglot.optimizer.annotate_types import annotate_types 1022 1023 this = annotate_types(this, dialect=self.dialect) 1024 1025 if this.is_type(exp.DataType.Type.MAP): 1026 bracket = f"({bracket})[1]" 1027 1028 return bracket 1029 1030 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1031 expression_sql = self.sql(expression, "expression") 1032 1033 func = expression.this 1034 if isinstance(func, exp.PERCENTILES): 1035 # Make the order key the first arg and slide the fraction to the right 1036 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1037 order_col = expression.find(exp.Ordered) 1038 if order_col: 1039 func.set("expression", func.this) 1040 func.set("this", order_col.this) 1041 1042 this = self.sql(expression, "this").rstrip(")") 1043 1044 return f"{this}{expression_sql})" 1045 1046 def length_sql(self, expression: exp.Length) -> str: 1047 arg = expression.this 1048 1049 # Dialects like BQ and Snowflake also accept binary values as args, so 1050 # DDB will attempt to infer the type or resort to case/when resolution 1051 if not expression.args.get("binary") or arg.is_string: 1052 return self.func("LENGTH", arg) 1053 1054 if not arg.type: 1055 from sqlglot.optimizer.annotate_types import annotate_types 1056 1057 arg = annotate_types(arg, dialect=self.dialect) 1058 1059 if arg.is_type(*exp.DataType.TEXT_TYPES): 1060 return self.func("LENGTH", arg) 1061 1062 # We need these casts to make duckdb's static type checker happy 1063 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1064 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1065 1066 case = ( 1067 exp.case(self.func("TYPEOF", arg)) 1068 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1069 .else_( 1070 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1071 ) # anonymous to break length_sql recursion 1072 ) 1073 1074 return self.sql(case) 1075 1076 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1077 this = expression.this 1078 key = expression.args.get("key") 1079 key_sql = key.name if isinstance(key, exp.Expression) else "" 1080 value_sql = self.sql(expression, "value") 1081 1082 kv_sql = f"{key_sql} := {value_sql}" 1083 1084 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1085 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1086 if isinstance(this, exp.Struct) and not this.expressions: 1087 return self.func("STRUCT_PACK", kv_sql) 1088 1089 return self.func("STRUCT_INSERT", this, kv_sql) 1090 1091 def unnest_sql(self, expression: exp.Unnest) -> str: 1092 explode_array = expression.args.get("explode_array") 1093 if explode_array: 1094 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1095 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1096 expression.expressions.append( 1097 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1098 ) 1099 1100 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1101 alias = expression.args.get("alias") 1102 if alias: 1103 expression.set("alias", None) 1104 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1105 1106 unnest_sql = super().unnest_sql(expression) 1107 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1108 return self.sql(select) 1109 1110 return super().unnest_sql(expression) 1111 1112 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render IGNORE NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1116 return super().ignorenulls_sql(expression) 1117 1118 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this") 1120 1121 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1122 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1123 # DuckDB should render RESPECT NULLS only for the general-purpose 1124 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1125 return super().respectnulls_sql(expression) 1126 1127 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1128 return self.sql(expression, "this") 1129 1130 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1131 this = self.sql(expression, "this") 1132 null_text = self.sql(expression, "null") 1133 1134 if null_text: 1135 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1136 1137 return self.func("ARRAY_TO_STRING", this, expression.expression) 1138 1139 @unsupported_args("position", "occurrence") 1140 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1141 group = expression.args.get("group") 1142 params = expression.args.get("parameters") 1143 1144 # Do not render group if there is no following argument, 1145 # and it's the default value for this dialect 1146 if ( 1147 not params 1148 and group 1149 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1150 ): 1151 group = None 1152 return self.func( 1153 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1154 ) 1155 1156 @unsupported_args("culture") 1157 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1158 fmt = expression.args.get("format") 1159 if fmt and fmt.is_int: 1160 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1161 1162 self.unsupported("Only integer formats are supported by NumberToStr") 1163 return self.function_fallback_sql(expression) 1164 1165 def autoincrementcolumnconstraint_sql(self, _) -> str: 1166 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1167 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path)
312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "DATETIME": TokenType.TIMESTAMPNTZ, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
603 class Generator(generator.Generator): 604 PARAMETER_TOKEN = "$" 605 NAMED_PLACEHOLDER_TOKEN = "$" 606 JOIN_HINTS = False 607 TABLE_HINTS = False 608 QUERY_HINTS = False 609 LIMIT_FETCH = "LIMIT" 610 STRUCT_DELIMITER = ("(", ")") 611 RENAME_TABLE_WITH_DB = False 612 NVL2_SUPPORTED = False 613 SEMI_ANTI_JOIN_WITH_SIDE = False 614 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 615 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 616 LAST_DAY_SUPPORTS_DATE_PART = False 617 JSON_KEY_VALUE_PAIR_SEP = "," 618 IGNORE_NULLS_IN_FUNC = True 619 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 620 SUPPORTS_CREATE_TABLE_LIKE = False 621 MULTI_ARG_DISTINCT = False 622 CAN_IMPLEMENT_ARRAY_ANY = True 623 SUPPORTS_TO_NUMBER = False 624 SUPPORTS_WINDOW_EXCLUDE = True 625 COPY_HAS_INTO_KEYWORD = False 626 STAR_EXCEPT = "EXCLUDE" 627 PAD_FILL_PATTERN_IS_REQUIRED = True 628 ARRAY_CONCAT_IS_VAR_LEN = False 629 ARRAY_SIZE_DIM_REQUIRED = False 630 NORMALIZE_EXTRACT_DATE_PARTS = True 631 632 TRANSFORMS = { 633 **generator.Generator.TRANSFORMS, 634 exp.ApproxDistinct: approx_count_distinct_sql, 635 exp.Array: inline_array_unless_query, 636 exp.ArrayFilter: rename_func("LIST_FILTER"), 637 exp.ArrayRemove: remove_from_array_using_filter, 638 exp.ArraySort: _array_sort_sql, 639 exp.ArraySum: rename_func("LIST_SUM"), 640 exp.BitwiseXor: rename_func("XOR"), 641 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 642 exp.CurrentDate: lambda *_: "CURRENT_DATE", 643 exp.CurrentTime: lambda *_: "CURRENT_TIME", 644 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 645 exp.DayOfMonth: rename_func("DAYOFMONTH"), 646 exp.DayOfWeek: rename_func("DAYOFWEEK"), 647 exp.DayOfWeekIso: rename_func("ISODOW"), 648 exp.DayOfYear: rename_func("DAYOFYEAR"), 649 exp.DataType: _datatype_sql, 650 exp.Date: _date_sql, 651 exp.DateAdd: _date_delta_sql, 652 exp.DateFromParts: rename_func("MAKE_DATE"), 653 exp.DateSub: _date_delta_sql, 654 exp.DateDiff: _date_diff_sql, 655 exp.DateStrToDate: datestrtodate_sql, 656 exp.Datetime: no_datetime_sql, 657 exp.DatetimeSub: _date_delta_sql, 658 exp.DatetimeAdd: _date_delta_sql, 659 exp.DateToDi: lambda self, 660 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 661 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 662 exp.DiToDate: lambda self, 663 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 664 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 665 exp.GenerateDateArray: _generate_datetime_array_sql, 666 exp.GenerateTimestampArray: _generate_datetime_array_sql, 667 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 668 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 669 exp.Explode: rename_func("UNNEST"), 670 exp.IntDiv: lambda self, e: self.binary(e, "//"), 671 exp.IsInf: rename_func("ISINF"), 672 exp.IsNan: rename_func("ISNAN"), 673 exp.JSONBExists: rename_func("JSON_EXISTS"), 674 exp.JSONExtract: _arrow_json_extract_sql, 675 exp.JSONExtractArray: _json_extract_value_array_sql, 676 exp.JSONExtractScalar: _arrow_json_extract_sql, 677 exp.JSONFormat: _json_format_sql, 678 exp.JSONValueArray: _json_extract_value_array_sql, 679 exp.Lateral: explode_to_unnest_sql, 680 exp.LogicalOr: rename_func("BOOL_OR"), 681 exp.LogicalAnd: rename_func("BOOL_AND"), 682 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 683 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 684 exp.MonthsBetween: lambda self, e: self.func( 685 "DATEDIFF", 686 "'month'", 687 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 688 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 689 ), 690 exp.PercentileCont: rename_func("QUANTILE_CONT"), 691 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 692 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 693 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 694 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 695 exp.RegexpReplace: lambda self, e: self.func( 696 "REGEXP_REPLACE", 697 e.this, 698 e.expression, 699 e.args.get("replacement"), 700 e.args.get("modifiers"), 701 ), 702 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 703 exp.RegexpILike: lambda self, e: self.func( 704 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 705 ), 706 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 707 exp.Return: lambda self, e: self.sql(e, "this"), 708 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 709 exp.Rand: rename_func("RANDOM"), 710 exp.SHA: rename_func("SHA1"), 711 exp.SHA2: sha256_sql, 712 exp.Split: rename_func("STR_SPLIT"), 713 exp.SortArray: _sort_array_sql, 714 exp.StrPosition: strposition_sql, 715 exp.StrToUnix: lambda self, e: self.func( 716 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 717 ), 718 exp.Struct: _struct_sql, 719 exp.Transform: rename_func("LIST_TRANSFORM"), 720 exp.TimeAdd: _date_delta_sql, 721 exp.Time: no_time_sql, 722 exp.TimeDiff: _timediff_sql, 723 exp.Timestamp: no_timestamp_sql, 724 exp.TimestampDiff: lambda self, e: self.func( 725 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 726 ), 727 exp.TimestampTrunc: timestamptrunc_sql(), 728 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 729 exp.TimeStrToTime: timestrtotime_sql, 730 exp.TimeStrToUnix: lambda self, e: self.func( 731 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 732 ), 733 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 734 exp.TimeToUnix: rename_func("EPOCH"), 735 exp.TsOrDiToDi: lambda self, 736 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 737 exp.TsOrDsAdd: _date_delta_sql, 738 exp.TsOrDsDiff: lambda self, e: self.func( 739 "DATE_DIFF", 740 f"'{e.args.get('unit') or 'DAY'}'", 741 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 742 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 743 ), 744 exp.UnixToStr: lambda self, e: self.func( 745 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 746 ), 747 exp.DatetimeTrunc: lambda self, e: self.func( 748 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 749 ), 750 exp.UnixToTime: _unix_to_time_sql, 751 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 752 exp.VariancePop: rename_func("VAR_POP"), 753 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 754 exp.Xor: bool_xor_sql, 755 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 756 rename_func("LEVENSHTEIN") 757 ), 758 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 759 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 760 exp.DateBin: rename_func("TIME_BUCKET"), 761 } 762 763 SUPPORTED_JSON_PATH_PARTS = { 764 exp.JSONPathKey, 765 exp.JSONPathRoot, 766 exp.JSONPathSubscript, 767 exp.JSONPathWildcard, 768 } 769 770 TYPE_MAPPING = { 771 **generator.Generator.TYPE_MAPPING, 772 exp.DataType.Type.BINARY: "BLOB", 773 exp.DataType.Type.BPCHAR: "TEXT", 774 exp.DataType.Type.CHAR: "TEXT", 775 exp.DataType.Type.DATETIME: "TIMESTAMP", 776 exp.DataType.Type.FLOAT: "REAL", 777 exp.DataType.Type.JSONB: "JSON", 778 exp.DataType.Type.NCHAR: "TEXT", 779 exp.DataType.Type.NVARCHAR: "TEXT", 780 exp.DataType.Type.UINT: "UINTEGER", 781 exp.DataType.Type.VARBINARY: "BLOB", 782 exp.DataType.Type.ROWVERSION: "BLOB", 783 exp.DataType.Type.VARCHAR: "TEXT", 784 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 785 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 786 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 787 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 788 } 789 790 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 791 RESERVED_KEYWORDS = { 792 "array", 793 "analyse", 794 "union", 795 "all", 796 "when", 797 "in_p", 798 "default", 799 "create_p", 800 "window", 801 "asymmetric", 802 "to", 803 "else", 804 "localtime", 805 "from", 806 "end_p", 807 "select", 808 "current_date", 809 "foreign", 810 "with", 811 "grant", 812 "session_user", 813 "or", 814 "except", 815 "references", 816 "fetch", 817 "limit", 818 "group_p", 819 "leading", 820 "into", 821 "collate", 822 "offset", 823 "do", 824 "then", 825 "localtimestamp", 826 "check_p", 827 "lateral_p", 828 "current_role", 829 "where", 830 "asc_p", 831 "placing", 832 "desc_p", 833 "user", 834 "unique", 835 "initially", 836 "column", 837 "both", 838 "some", 839 "as", 840 "any", 841 "only", 842 "deferrable", 843 "null_p", 844 "current_time", 845 "true_p", 846 "table", 847 "case", 848 "trailing", 849 "variadic", 850 "for", 851 "on", 852 "distinct", 853 "false_p", 854 "not", 855 "constraint", 856 "current_timestamp", 857 "returning", 858 "primary", 859 "intersect", 860 "having", 861 "analyze", 862 "current_user", 863 "and", 864 "cast", 865 "symmetric", 866 "using", 867 "order", 868 "current_catalog", 869 } 870 871 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 872 873 # DuckDB doesn't generally support CREATE TABLE .. properties 874 # https://duckdb.org/docs/sql/statements/create_table.html 875 PROPERTIES_LOCATION = { 876 prop: exp.Properties.Location.UNSUPPORTED 877 for prop in generator.Generator.PROPERTIES_LOCATION 878 } 879 880 # There are a few exceptions (e.g. temporary tables) which are supported or 881 # can be transpiled to DuckDB, so we explicitly override them accordingly 882 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 883 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 884 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 885 886 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 887 exp.FirstValue, 888 exp.Lag, 889 exp.LastValue, 890 exp.Lead, 891 exp.NthValue, 892 ) 893 894 def show_sql(self, expression: exp.Show) -> str: 895 return f"SHOW {expression.name}" 896 897 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 898 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 899 900 def strtotime_sql(self, expression: exp.StrToTime) -> str: 901 if expression.args.get("safe"): 902 formatted_time = self.format_time(expression) 903 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 904 return str_to_time_sql(self, expression) 905 906 def strtodate_sql(self, expression: exp.StrToDate) -> str: 907 if expression.args.get("safe"): 908 formatted_time = self.format_time(expression) 909 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 910 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 911 912 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 913 arg = expression.this 914 if expression.args.get("safe"): 915 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 916 return self.func("JSON", arg) 917 918 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 919 nano = expression.args.get("nano") 920 if nano is not None: 921 expression.set( 922 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 923 ) 924 925 return rename_func("MAKE_TIME")(self, expression) 926 927 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 928 sec = expression.args["sec"] 929 930 milli = expression.args.get("milli") 931 if milli is not None: 932 sec += milli.pop() / exp.Literal.number(1000.0) 933 934 nano = expression.args.get("nano") 935 if nano is not None: 936 sec += nano.pop() / exp.Literal.number(1000000000.0) 937 938 if milli or nano: 939 expression.set("sec", sec) 940 941 return rename_func("MAKE_TIMESTAMP")(self, expression) 942 943 def tablesample_sql( 944 self, 945 expression: exp.TableSample, 946 tablesample_keyword: t.Optional[str] = None, 947 ) -> str: 948 if not isinstance(expression.parent, exp.Select): 949 # This sample clause only applies to a single source, not the entire resulting relation 950 tablesample_keyword = "TABLESAMPLE" 951 952 if expression.args.get("size"): 953 method = expression.args.get("method") 954 if method and method.name.upper() != "RESERVOIR": 955 self.unsupported( 956 f"Sampling method {method} is not supported with a discrete sample count, " 957 "defaulting to reservoir sampling" 958 ) 959 expression.set("method", exp.var("RESERVOIR")) 960 961 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 962 963 def interval_sql(self, expression: exp.Interval) -> str: 964 multiplier: t.Optional[int] = None 965 unit = expression.text("unit").lower() 966 967 if unit.startswith("week"): 968 multiplier = 7 969 if unit.startswith("quarter"): 970 multiplier = 90 971 972 if multiplier: 973 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 974 975 return super().interval_sql(expression) 976 977 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 978 if isinstance(expression.parent, exp.UserDefinedFunction): 979 return self.sql(expression, "this") 980 return super().columndef_sql(expression, sep) 981 982 def join_sql(self, expression: exp.Join) -> str: 983 if ( 984 expression.side == "LEFT" 985 and not expression.args.get("on") 986 and isinstance(expression.this, exp.Unnest) 987 ): 988 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 989 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 990 return super().join_sql(expression.on(exp.true())) 991 992 return super().join_sql(expression) 993 994 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 995 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 996 if expression.args.get("is_end_exclusive"): 997 return rename_func("RANGE")(self, expression) 998 999 return self.function_fallback_sql(expression) 1000 1001 def countif_sql(self, expression: exp.CountIf) -> str: 1002 if self.dialect.version >= Version("1.2"): 1003 return self.function_fallback_sql(expression) 1004 1005 # https://github.com/tobymao/sqlglot/pull/4749 1006 return count_if_to_sum(self, expression) 1007 1008 def bracket_sql(self, expression: exp.Bracket) -> str: 1009 if self.dialect.version >= Version("1.2"): 1010 return super().bracket_sql(expression) 1011 1012 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1013 this = expression.this 1014 if isinstance(this, exp.Array): 1015 this.replace(exp.paren(this)) 1016 1017 bracket = super().bracket_sql(expression) 1018 1019 if not expression.args.get("returns_list_for_maps"): 1020 if not this.type: 1021 from sqlglot.optimizer.annotate_types import annotate_types 1022 1023 this = annotate_types(this, dialect=self.dialect) 1024 1025 if this.is_type(exp.DataType.Type.MAP): 1026 bracket = f"({bracket})[1]" 1027 1028 return bracket 1029 1030 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1031 expression_sql = self.sql(expression, "expression") 1032 1033 func = expression.this 1034 if isinstance(func, exp.PERCENTILES): 1035 # Make the order key the first arg and slide the fraction to the right 1036 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1037 order_col = expression.find(exp.Ordered) 1038 if order_col: 1039 func.set("expression", func.this) 1040 func.set("this", order_col.this) 1041 1042 this = self.sql(expression, "this").rstrip(")") 1043 1044 return f"{this}{expression_sql})" 1045 1046 def length_sql(self, expression: exp.Length) -> str: 1047 arg = expression.this 1048 1049 # Dialects like BQ and Snowflake also accept binary values as args, so 1050 # DDB will attempt to infer the type or resort to case/when resolution 1051 if not expression.args.get("binary") or arg.is_string: 1052 return self.func("LENGTH", arg) 1053 1054 if not arg.type: 1055 from sqlglot.optimizer.annotate_types import annotate_types 1056 1057 arg = annotate_types(arg, dialect=self.dialect) 1058 1059 if arg.is_type(*exp.DataType.TEXT_TYPES): 1060 return self.func("LENGTH", arg) 1061 1062 # We need these casts to make duckdb's static type checker happy 1063 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1064 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1065 1066 case = ( 1067 exp.case(self.func("TYPEOF", arg)) 1068 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1069 .else_( 1070 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1071 ) # anonymous to break length_sql recursion 1072 ) 1073 1074 return self.sql(case) 1075 1076 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1077 this = expression.this 1078 key = expression.args.get("key") 1079 key_sql = key.name if isinstance(key, exp.Expression) else "" 1080 value_sql = self.sql(expression, "value") 1081 1082 kv_sql = f"{key_sql} := {value_sql}" 1083 1084 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1085 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1086 if isinstance(this, exp.Struct) and not this.expressions: 1087 return self.func("STRUCT_PACK", kv_sql) 1088 1089 return self.func("STRUCT_INSERT", this, kv_sql) 1090 1091 def unnest_sql(self, expression: exp.Unnest) -> str: 1092 explode_array = expression.args.get("explode_array") 1093 if explode_array: 1094 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1095 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1096 expression.expressions.append( 1097 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1098 ) 1099 1100 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1101 alias = expression.args.get("alias") 1102 if alias: 1103 expression.set("alias", None) 1104 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1105 1106 unnest_sql = super().unnest_sql(expression) 1107 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1108 return self.sql(select) 1109 1110 return super().unnest_sql(expression) 1111 1112 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render IGNORE NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1116 return super().ignorenulls_sql(expression) 1117 1118 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this") 1120 1121 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1122 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1123 # DuckDB should render RESPECT NULLS only for the general-purpose 1124 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1125 return super().respectnulls_sql(expression) 1126 1127 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1128 return self.sql(expression, "this") 1129 1130 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1131 this = self.sql(expression, "this") 1132 null_text = self.sql(expression, "null") 1133 1134 if null_text: 1135 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1136 1137 return self.func("ARRAY_TO_STRING", this, expression.expression) 1138 1139 @unsupported_args("position", "occurrence") 1140 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1141 group = expression.args.get("group") 1142 params = expression.args.get("parameters") 1143 1144 # Do not render group if there is no following argument, 1145 # and it's the default value for this dialect 1146 if ( 1147 not params 1148 and group 1149 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1150 ): 1151 group = None 1152 return self.func( 1153 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1154 ) 1155 1156 @unsupported_args("culture") 1157 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1158 fmt = expression.args.get("format") 1159 if fmt and fmt.is_int: 1160 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1161 1162 self.unsupported("Only integer formats are supported by NumberToStr") 1163 return self.function_fallback_sql(expression) 1164 1165 def autoincrementcolumnconstraint_sql(self, _) -> str: 1166 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1167 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
906 def strtodate_sql(self, expression: exp.StrToDate) -> str: 907 if expression.args.get("safe"): 908 formatted_time = self.format_time(expression) 909 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 910 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
918 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 919 nano = expression.args.get("nano") 920 if nano is not None: 921 expression.set( 922 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 923 ) 924 925 return rename_func("MAKE_TIME")(self, expression)
927 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 928 sec = expression.args["sec"] 929 930 milli = expression.args.get("milli") 931 if milli is not None: 932 sec += milli.pop() / exp.Literal.number(1000.0) 933 934 nano = expression.args.get("nano") 935 if nano is not None: 936 sec += nano.pop() / exp.Literal.number(1000000000.0) 937 938 if milli or nano: 939 expression.set("sec", sec) 940 941 return rename_func("MAKE_TIMESTAMP")(self, expression)
943 def tablesample_sql( 944 self, 945 expression: exp.TableSample, 946 tablesample_keyword: t.Optional[str] = None, 947 ) -> str: 948 if not isinstance(expression.parent, exp.Select): 949 # This sample clause only applies to a single source, not the entire resulting relation 950 tablesample_keyword = "TABLESAMPLE" 951 952 if expression.args.get("size"): 953 method = expression.args.get("method") 954 if method and method.name.upper() != "RESERVOIR": 955 self.unsupported( 956 f"Sampling method {method} is not supported with a discrete sample count, " 957 "defaulting to reservoir sampling" 958 ) 959 expression.set("method", exp.var("RESERVOIR")) 960 961 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
963 def interval_sql(self, expression: exp.Interval) -> str: 964 multiplier: t.Optional[int] = None 965 unit = expression.text("unit").lower() 966 967 if unit.startswith("week"): 968 multiplier = 7 969 if unit.startswith("quarter"): 970 multiplier = 90 971 972 if multiplier: 973 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 974 975 return super().interval_sql(expression)
982 def join_sql(self, expression: exp.Join) -> str: 983 if ( 984 expression.side == "LEFT" 985 and not expression.args.get("on") 986 and isinstance(expression.this, exp.Unnest) 987 ): 988 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 989 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 990 return super().join_sql(expression.on(exp.true())) 991 992 return super().join_sql(expression)
1008 def bracket_sql(self, expression: exp.Bracket) -> str: 1009 if self.dialect.version >= Version("1.2"): 1010 return super().bracket_sql(expression) 1011 1012 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1013 this = expression.this 1014 if isinstance(this, exp.Array): 1015 this.replace(exp.paren(this)) 1016 1017 bracket = super().bracket_sql(expression) 1018 1019 if not expression.args.get("returns_list_for_maps"): 1020 if not this.type: 1021 from sqlglot.optimizer.annotate_types import annotate_types 1022 1023 this = annotate_types(this, dialect=self.dialect) 1024 1025 if this.is_type(exp.DataType.Type.MAP): 1026 bracket = f"({bracket})[1]" 1027 1028 return bracket
1030 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1031 expression_sql = self.sql(expression, "expression") 1032 1033 func = expression.this 1034 if isinstance(func, exp.PERCENTILES): 1035 # Make the order key the first arg and slide the fraction to the right 1036 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1037 order_col = expression.find(exp.Ordered) 1038 if order_col: 1039 func.set("expression", func.this) 1040 func.set("this", order_col.this) 1041 1042 this = self.sql(expression, "this").rstrip(")") 1043 1044 return f"{this}{expression_sql})"
1046 def length_sql(self, expression: exp.Length) -> str: 1047 arg = expression.this 1048 1049 # Dialects like BQ and Snowflake also accept binary values as args, so 1050 # DDB will attempt to infer the type or resort to case/when resolution 1051 if not expression.args.get("binary") or arg.is_string: 1052 return self.func("LENGTH", arg) 1053 1054 if not arg.type: 1055 from sqlglot.optimizer.annotate_types import annotate_types 1056 1057 arg = annotate_types(arg, dialect=self.dialect) 1058 1059 if arg.is_type(*exp.DataType.TEXT_TYPES): 1060 return self.func("LENGTH", arg) 1061 1062 # We need these casts to make duckdb's static type checker happy 1063 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1064 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1065 1066 case = ( 1067 exp.case(self.func("TYPEOF", arg)) 1068 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1069 .else_( 1070 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1071 ) # anonymous to break length_sql recursion 1072 ) 1073 1074 return self.sql(case)
1076 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1077 this = expression.this 1078 key = expression.args.get("key") 1079 key_sql = key.name if isinstance(key, exp.Expression) else "" 1080 value_sql = self.sql(expression, "value") 1081 1082 kv_sql = f"{key_sql} := {value_sql}" 1083 1084 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1085 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1086 if isinstance(this, exp.Struct) and not this.expressions: 1087 return self.func("STRUCT_PACK", kv_sql) 1088 1089 return self.func("STRUCT_INSERT", this, kv_sql)
1091 def unnest_sql(self, expression: exp.Unnest) -> str: 1092 explode_array = expression.args.get("explode_array") 1093 if explode_array: 1094 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1095 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1096 expression.expressions.append( 1097 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1098 ) 1099 1100 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1101 alias = expression.args.get("alias") 1102 if alias: 1103 expression.set("alias", None) 1104 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1105 1106 unnest_sql = super().unnest_sql(expression) 1107 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1108 return self.sql(select) 1109 1110 return super().unnest_sql(expression)
1112 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render IGNORE NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1116 return super().ignorenulls_sql(expression) 1117 1118 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this")
1121 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1122 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1123 # DuckDB should render RESPECT NULLS only for the general-purpose 1124 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1125 return super().respectnulls_sql(expression) 1126 1127 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1128 return self.sql(expression, "this")
1130 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1131 this = self.sql(expression, "this") 1132 null_text = self.sql(expression, "null") 1133 1134 if null_text: 1135 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1136 1137 return self.func("ARRAY_TO_STRING", this, expression.expression)
1139 @unsupported_args("position", "occurrence") 1140 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1141 group = expression.args.get("group") 1142 params = expression.args.get("parameters") 1143 1144 # Do not render group if there is no following argument, 1145 # and it's the default value for this dialect 1146 if ( 1147 not params 1148 and group 1149 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1150 ): 1151 group = None 1152 return self.func( 1153 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1154 )
1156 @unsupported_args("culture") 1157 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1158 fmt = expression.args.get("format") 1159 if fmt and fmt.is_int: 1160 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1161 1162 self.unsupported("Only integer formats are supported by NumberToStr") 1163 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql