sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 strposition_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39 groupconcat_sql, 40) 41from sqlglot.generator import unsupported_args 42from sqlglot.helper import seq_get 43from sqlglot.tokens import TokenType 44from sqlglot.parser import binary_range_parser 45 46DATETIME_DELTA = t.Union[ 47 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 48] 49 50 51def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 52 this = expression.this 53 unit = unit_to_var(expression) 54 op = ( 55 "+" 56 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 57 else "-" 58 ) 59 60 to_type: t.Optional[DATA_TYPE] = None 61 if isinstance(expression, exp.TsOrDsAdd): 62 to_type = expression.return_type 63 elif this.is_string: 64 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 65 to_type = ( 66 exp.DataType.Type.DATETIME 67 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 68 else exp.DataType.Type.DATE 69 ) 70 71 this = exp.cast(this, to_type) if to_type else this 72 73 expr = expression.expression 74 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 75 76 return f"{self.sql(this)} {op} {self.sql(interval)}" 77 78 79# BigQuery -> DuckDB conversion for the DATE function 80def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 81 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 82 zone = self.sql(expression, "zone") 83 84 if zone: 85 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 86 date_str = f"{date_str} || ' ' || {zone}" 87 88 # This will create a TIMESTAMP with time zone information 89 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 90 91 return result 92 93 94# BigQuery -> DuckDB conversion for the TIME_DIFF function 95def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 96 this = exp.cast(expression.this, exp.DataType.Type.TIME) 97 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 98 99 # Although the 2 dialects share similar signatures, BQ seems to inverse 100 # the sign of the result so the start/end time operands are flipped 101 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 102 103 104@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 105def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 106 return self.func("ARRAY_SORT", expression.this) 107 108 109def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 110 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 111 return self.func(name, expression.this) 112 113 114def _build_sort_array_desc(args: t.List) -> exp.Expression: 115 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 116 117 118def _build_date_diff(args: t.List) -> exp.Expression: 119 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 120 121 122def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 123 def _builder(args: t.List) -> exp.GenerateSeries: 124 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 125 if len(args) == 1: 126 # DuckDB uses 0 as a default for the series' start when it's omitted 127 args.insert(0, exp.Literal.number("0")) 128 129 gen_series = exp.GenerateSeries.from_arg_list(args) 130 gen_series.set("is_end_exclusive", end_exclusive) 131 132 return gen_series 133 134 return _builder 135 136 137def _build_make_timestamp(args: t.List) -> exp.Expression: 138 if len(args) == 1: 139 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 140 141 return exp.TimestampFromParts( 142 year=seq_get(args, 0), 143 month=seq_get(args, 1), 144 day=seq_get(args, 2), 145 hour=seq_get(args, 3), 146 min=seq_get(args, 4), 147 sec=seq_get(args, 5), 148 ) 149 150 151def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 152 def _parse(self: DuckDB.Parser) -> exp.Show: 153 return self._parse_show_duckdb(*args, **kwargs) 154 155 return _parse 156 157 158def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 159 args: t.List[str] = [] 160 161 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 162 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 163 # The transformation to ROW will take place if: 164 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 165 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 166 ancestor_cast = expression.find_ancestor(exp.Cast) 167 is_bq_inline_struct = ( 168 (expression.find(exp.PropertyEQ) is None) 169 and ancestor_cast 170 and any( 171 casted_type.is_type(exp.DataType.Type.STRUCT) 172 for casted_type in ancestor_cast.find_all(exp.DataType) 173 ) 174 ) 175 176 for i, expr in enumerate(expression.expressions): 177 is_property_eq = isinstance(expr, exp.PropertyEQ) 178 value = expr.expression if is_property_eq else expr 179 180 if is_bq_inline_struct: 181 args.append(self.sql(value)) 182 else: 183 key = expr.name if is_property_eq else f"_{i}" 184 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 185 186 csv_args = ", ".join(args) 187 188 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 189 190 191def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 192 if expression.is_type("array"): 193 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 194 195 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 196 if expression.is_type( 197 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 198 ): 199 return expression.this.value 200 201 return self.datatype_sql(expression) 202 203 204def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 205 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 206 return f"CAST({sql} AS TEXT)" 207 208 209def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 210 scale = expression.args.get("scale") 211 timestamp = expression.this 212 213 if scale in (None, exp.UnixToTime.SECONDS): 214 return self.func("TO_TIMESTAMP", timestamp) 215 if scale == exp.UnixToTime.MILLIS: 216 return self.func("EPOCH_MS", timestamp) 217 if scale == exp.UnixToTime.MICROS: 218 return self.func("MAKE_TIMESTAMP", timestamp) 219 220 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 221 222 223WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 224 225 226def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 227 arrow_sql = arrow_json_extract_sql(self, expression) 228 if not expression.same_parent and isinstance( 229 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 230 ): 231 arrow_sql = self.wrap(arrow_sql) 232 return arrow_sql 233 234 235def _implicit_datetime_cast( 236 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 237) -> t.Optional[exp.Expression]: 238 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 239 240 241def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 242 this = _implicit_datetime_cast(expression.this) 243 expr = _implicit_datetime_cast(expression.expression) 244 245 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 246 247 248def _generate_datetime_array_sql( 249 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 250) -> str: 251 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 252 253 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 254 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 255 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 256 257 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 258 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 259 start=start, end=end, step=expression.args.get("step") 260 ) 261 262 if is_generate_date_array: 263 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 264 # GENERATE_DATE_ARRAY we must cast it back to DATE array 265 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 266 267 return self.sql(gen_series) 268 269 270def _json_extract_value_array_sql( 271 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 272) -> str: 273 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 274 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 275 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 276 277 278class DuckDB(Dialect): 279 NULL_ORDERING = "nulls_are_last" 280 SUPPORTS_USER_DEFINED_TYPES = True 281 SAFE_DIVISION = True 282 INDEX_OFFSET = 1 283 CONCAT_COALESCE = True 284 SUPPORTS_ORDER_BY_ALL = True 285 SUPPORTS_FIXED_SIZE_ARRAYS = True 286 STRICT_JSON_PATH_SYNTAX = False 287 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 288 289 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 290 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 291 292 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 293 if isinstance(path, exp.Literal): 294 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 295 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 296 # This check ensures we'll avoid trying to parse these as JSON paths, which can 297 # either result in a noisy warning or in an invalid representation of the path. 298 path_text = path.name 299 if path_text.startswith("/") or "[#" in path_text: 300 return path 301 302 return super().to_json_path(path) 303 304 class Tokenizer(tokens.Tokenizer): 305 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 306 HEREDOC_STRINGS = ["$"] 307 308 HEREDOC_TAG_IS_IDENTIFIER = True 309 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 310 311 KEYWORDS = { 312 **tokens.Tokenizer.KEYWORDS, 313 "//": TokenType.DIV, 314 "**": TokenType.DSTAR, 315 "^@": TokenType.CARET_AT, 316 "@>": TokenType.AT_GT, 317 "<@": TokenType.LT_AT, 318 "ATTACH": TokenType.ATTACH, 319 "BINARY": TokenType.VARBINARY, 320 "BITSTRING": TokenType.BIT, 321 "BPCHAR": TokenType.TEXT, 322 "CHAR": TokenType.TEXT, 323 "DATETIME": TokenType.TIMESTAMPNTZ, 324 "DETACH": TokenType.DETACH, 325 "EXCLUDE": TokenType.EXCEPT, 326 "LOGICAL": TokenType.BOOLEAN, 327 "ONLY": TokenType.ONLY, 328 "PIVOT_WIDER": TokenType.PIVOT, 329 "POSITIONAL": TokenType.POSITIONAL, 330 "SIGNED": TokenType.INT, 331 "STRING": TokenType.TEXT, 332 "SUMMARIZE": TokenType.SUMMARIZE, 333 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 334 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 335 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 336 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 337 "TIMESTAMP_US": TokenType.TIMESTAMP, 338 "UBIGINT": TokenType.UBIGINT, 339 "UINTEGER": TokenType.UINT, 340 "USMALLINT": TokenType.USMALLINT, 341 "UTINYINT": TokenType.UTINYINT, 342 "VARCHAR": TokenType.TEXT, 343 } 344 KEYWORDS.pop("/*+") 345 346 SINGLE_TOKENS = { 347 **tokens.Tokenizer.SINGLE_TOKENS, 348 "$": TokenType.PARAMETER, 349 } 350 351 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 352 353 class Parser(parser.Parser): 354 BITWISE = { 355 **parser.Parser.BITWISE, 356 TokenType.TILDA: exp.RegexpLike, 357 } 358 BITWISE.pop(TokenType.CARET) 359 360 RANGE_PARSERS = { 361 **parser.Parser.RANGE_PARSERS, 362 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 363 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 364 } 365 366 EXPONENT = { 367 **parser.Parser.EXPONENT, 368 TokenType.CARET: exp.Pow, 369 TokenType.DSTAR: exp.Pow, 370 } 371 372 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 373 374 SHOW_PARSERS = { 375 "TABLES": _show_parser("TABLES"), 376 "ALL TABLES": _show_parser("ALL TABLES"), 377 } 378 379 FUNCTIONS = { 380 **parser.Parser.FUNCTIONS, 381 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 382 "ARRAY_SORT": exp.SortArray.from_arg_list, 383 "DATEDIFF": _build_date_diff, 384 "DATE_DIFF": _build_date_diff, 385 "DATE_TRUNC": date_trunc_to_time, 386 "DATETRUNC": date_trunc_to_time, 387 "DECODE": lambda args: exp.Decode( 388 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 389 ), 390 "EDITDIST3": exp.Levenshtein.from_arg_list, 391 "ENCODE": lambda args: exp.Encode( 392 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 393 ), 394 "EPOCH": exp.TimeToUnix.from_arg_list, 395 "EPOCH_MS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "GENERATE_SERIES": _build_generate_series(), 399 "JSON": exp.ParseJSON.from_arg_list, 400 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 401 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 402 "LIST_HAS": exp.ArrayContains.from_arg_list, 403 "LIST_REVERSE_SORT": _build_sort_array_desc, 404 "LIST_SORT": exp.SortArray.from_arg_list, 405 "LIST_VALUE": lambda args: exp.Array(expressions=args), 406 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 407 "MAKE_TIMESTAMP": _build_make_timestamp, 408 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 409 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 410 "RANGE": _build_generate_series(end_exclusive=True), 411 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 412 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 413 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 414 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 415 this=seq_get(args, 0), 416 expression=seq_get(args, 1), 417 replacement=seq_get(args, 2), 418 modifiers=seq_get(args, 3), 419 ), 420 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 421 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 422 "STRING_SPLIT": exp.Split.from_arg_list, 423 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 424 "STRING_TO_ARRAY": exp.Split.from_arg_list, 425 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 426 "STRUCT_PACK": exp.Struct.from_arg_list, 427 "STR_SPLIT": exp.Split.from_arg_list, 428 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 429 "TIME_BUCKET": exp.DateBin.from_arg_list, 430 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 431 "UNNEST": exp.Explode.from_arg_list, 432 "XOR": binary_from_function(exp.BitwiseXor), 433 } 434 435 FUNCTIONS.pop("DATE_SUB") 436 FUNCTIONS.pop("GLOB") 437 438 FUNCTION_PARSERS = { 439 **parser.Parser.FUNCTION_PARSERS, 440 **dict.fromkeys( 441 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 442 ), 443 } 444 FUNCTION_PARSERS.pop("DECODE") 445 446 NO_PAREN_FUNCTION_PARSERS = { 447 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 448 "MAP": lambda self: self._parse_map(), 449 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 450 } 451 452 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 453 TokenType.SEMI, 454 TokenType.ANTI, 455 } 456 457 PLACEHOLDER_PARSERS = { 458 **parser.Parser.PLACEHOLDER_PARSERS, 459 TokenType.PARAMETER: lambda self: ( 460 self.expression(exp.Placeholder, this=self._prev.text) 461 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 462 else None 463 ), 464 } 465 466 TYPE_CONVERTERS = { 467 # https://duckdb.org/docs/sql/data_types/numeric 468 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 469 # https://duckdb.org/docs/sql/data_types/text 470 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 471 } 472 473 STATEMENT_PARSERS = { 474 **parser.Parser.STATEMENT_PARSERS, 475 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 476 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 477 TokenType.SHOW: lambda self: self._parse_show(), 478 } 479 480 def _parse_expression(self) -> t.Optional[exp.Expression]: 481 # DuckDB supports prefix aliases, e.g. foo: 1 482 if self._next and self._next.token_type == TokenType.COLON: 483 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 484 self._match(TokenType.COLON) 485 comments = self._prev_comments or [] 486 487 this = self._parse_assignment() 488 if isinstance(this, exp.Expression): 489 # Moves the comment next to the alias in `alias: expr /* comment */` 490 comments += this.pop_comments() or [] 491 492 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 493 494 return super()._parse_expression() 495 496 def _parse_table( 497 self, 498 schema: bool = False, 499 joins: bool = False, 500 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 501 parse_bracket: bool = False, 502 is_db_reference: bool = False, 503 parse_partition: bool = False, 504 ) -> t.Optional[exp.Expression]: 505 # DuckDB supports prefix aliases, e.g. FROM foo: bar 506 if self._next and self._next.token_type == TokenType.COLON: 507 alias = self._parse_table_alias( 508 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 509 ) 510 self._match(TokenType.COLON) 511 comments = self._prev_comments or [] 512 else: 513 alias = None 514 comments = [] 515 516 table = super()._parse_table( 517 schema=schema, 518 joins=joins, 519 alias_tokens=alias_tokens, 520 parse_bracket=parse_bracket, 521 is_db_reference=is_db_reference, 522 parse_partition=parse_partition, 523 ) 524 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 525 # Moves the comment next to the alias in `alias: table /* comment */` 526 comments += table.pop_comments() or [] 527 alias.comments = alias.pop_comments() + comments 528 table.set("alias", alias) 529 530 return table 531 532 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 533 # https://duckdb.org/docs/sql/samples.html 534 sample = super()._parse_table_sample(as_modifier=as_modifier) 535 if sample and not sample.args.get("method"): 536 if sample.args.get("size"): 537 sample.set("method", exp.var("RESERVOIR")) 538 else: 539 sample.set("method", exp.var("SYSTEM")) 540 541 return sample 542 543 def _parse_bracket( 544 self, this: t.Optional[exp.Expression] = None 545 ) -> t.Optional[exp.Expression]: 546 bracket = super()._parse_bracket(this) 547 548 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 549 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 550 bracket.set("returns_list_for_maps", True) 551 552 return bracket 553 554 def _parse_map(self) -> exp.ToMap | exp.Map: 555 if self._match(TokenType.L_BRACE, advance=False): 556 return self.expression(exp.ToMap, this=self._parse_bracket()) 557 558 args = self._parse_wrapped_csv(self._parse_assignment) 559 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 560 561 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 562 return self._parse_field_def() 563 564 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 565 if len(aggregations) == 1: 566 return super()._pivot_column_names(aggregations) 567 return pivot_column_names(aggregations, dialect="duckdb") 568 569 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 570 def _parse_attach_option() -> exp.AttachOption: 571 return self.expression( 572 exp.AttachOption, 573 this=self._parse_var(any_token=True), 574 expression=self._parse_field(any_token=True), 575 ) 576 577 self._match(TokenType.DATABASE) 578 exists = self._parse_exists(not_=is_attach) 579 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 580 581 if self._match(TokenType.L_PAREN, advance=False): 582 expressions = self._parse_wrapped_csv(_parse_attach_option) 583 else: 584 expressions = None 585 586 return ( 587 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 588 if is_attach 589 else self.expression(exp.Detach, this=this, exists=exists) 590 ) 591 592 def _parse_show_duckdb(self, this: str) -> exp.Show: 593 return self.expression(exp.Show, this=this) 594 595 class Generator(generator.Generator): 596 PARAMETER_TOKEN = "$" 597 NAMED_PLACEHOLDER_TOKEN = "$" 598 JOIN_HINTS = False 599 TABLE_HINTS = False 600 QUERY_HINTS = False 601 LIMIT_FETCH = "LIMIT" 602 STRUCT_DELIMITER = ("(", ")") 603 RENAME_TABLE_WITH_DB = False 604 NVL2_SUPPORTED = False 605 SEMI_ANTI_JOIN_WITH_SIDE = False 606 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 607 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 608 LAST_DAY_SUPPORTS_DATE_PART = False 609 JSON_KEY_VALUE_PAIR_SEP = "," 610 IGNORE_NULLS_IN_FUNC = True 611 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 612 SUPPORTS_CREATE_TABLE_LIKE = False 613 MULTI_ARG_DISTINCT = False 614 CAN_IMPLEMENT_ARRAY_ANY = True 615 SUPPORTS_TO_NUMBER = False 616 SUPPORTS_WINDOW_EXCLUDE = True 617 COPY_HAS_INTO_KEYWORD = False 618 STAR_EXCEPT = "EXCLUDE" 619 PAD_FILL_PATTERN_IS_REQUIRED = True 620 ARRAY_CONCAT_IS_VAR_LEN = False 621 ARRAY_SIZE_DIM_REQUIRED = False 622 623 TRANSFORMS = { 624 **generator.Generator.TRANSFORMS, 625 exp.ApproxDistinct: approx_count_distinct_sql, 626 exp.Array: inline_array_unless_query, 627 exp.ArrayFilter: rename_func("LIST_FILTER"), 628 exp.ArraySort: _array_sort_sql, 629 exp.ArraySum: rename_func("LIST_SUM"), 630 exp.BitwiseXor: rename_func("XOR"), 631 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 632 exp.CurrentDate: lambda *_: "CURRENT_DATE", 633 exp.CurrentTime: lambda *_: "CURRENT_TIME", 634 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 635 exp.DayOfMonth: rename_func("DAYOFMONTH"), 636 exp.DayOfWeek: rename_func("DAYOFWEEK"), 637 exp.DayOfWeekIso: rename_func("ISODOW"), 638 exp.DayOfYear: rename_func("DAYOFYEAR"), 639 exp.DataType: _datatype_sql, 640 exp.Date: _date_sql, 641 exp.DateAdd: _date_delta_sql, 642 exp.DateFromParts: rename_func("MAKE_DATE"), 643 exp.DateSub: _date_delta_sql, 644 exp.DateDiff: _date_diff_sql, 645 exp.DateStrToDate: datestrtodate_sql, 646 exp.Datetime: no_datetime_sql, 647 exp.DatetimeSub: _date_delta_sql, 648 exp.DatetimeAdd: _date_delta_sql, 649 exp.DateToDi: lambda self, 650 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 651 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 652 exp.DiToDate: lambda self, 653 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 654 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 655 exp.GenerateDateArray: _generate_datetime_array_sql, 656 exp.GenerateTimestampArray: _generate_datetime_array_sql, 657 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 658 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 659 exp.Explode: rename_func("UNNEST"), 660 exp.IntDiv: lambda self, e: self.binary(e, "//"), 661 exp.IsInf: rename_func("ISINF"), 662 exp.IsNan: rename_func("ISNAN"), 663 exp.JSONBExists: rename_func("JSON_EXISTS"), 664 exp.JSONExtract: _arrow_json_extract_sql, 665 exp.JSONExtractArray: _json_extract_value_array_sql, 666 exp.JSONExtractScalar: _arrow_json_extract_sql, 667 exp.JSONFormat: _json_format_sql, 668 exp.JSONValueArray: _json_extract_value_array_sql, 669 exp.Lateral: explode_to_unnest_sql, 670 exp.LogicalOr: rename_func("BOOL_OR"), 671 exp.LogicalAnd: rename_func("BOOL_AND"), 672 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 673 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 674 exp.MonthsBetween: lambda self, e: self.func( 675 "DATEDIFF", 676 "'month'", 677 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 678 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 679 ), 680 exp.PercentileCont: rename_func("QUANTILE_CONT"), 681 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 682 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 683 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 684 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 685 exp.RegexpReplace: lambda self, e: self.func( 686 "REGEXP_REPLACE", 687 e.this, 688 e.expression, 689 e.args.get("replacement"), 690 e.args.get("modifiers"), 691 ), 692 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 693 exp.RegexpILike: lambda self, e: self.func( 694 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 695 ), 696 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 697 exp.Return: lambda self, e: self.sql(e, "this"), 698 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 699 exp.Rand: rename_func("RANDOM"), 700 exp.SHA: rename_func("SHA1"), 701 exp.SHA2: sha256_sql, 702 exp.Split: rename_func("STR_SPLIT"), 703 exp.SortArray: _sort_array_sql, 704 exp.StrPosition: strposition_sql, 705 exp.StrToUnix: lambda self, e: self.func( 706 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 707 ), 708 exp.Struct: _struct_sql, 709 exp.Transform: rename_func("LIST_TRANSFORM"), 710 exp.TimeAdd: _date_delta_sql, 711 exp.Time: no_time_sql, 712 exp.TimeDiff: _timediff_sql, 713 exp.Timestamp: no_timestamp_sql, 714 exp.TimestampDiff: lambda self, e: self.func( 715 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 716 ), 717 exp.TimestampTrunc: timestamptrunc_sql(), 718 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 719 exp.TimeStrToTime: timestrtotime_sql, 720 exp.TimeStrToUnix: lambda self, e: self.func( 721 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 722 ), 723 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 724 exp.TimeToUnix: rename_func("EPOCH"), 725 exp.TsOrDiToDi: lambda self, 726 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 727 exp.TsOrDsAdd: _date_delta_sql, 728 exp.TsOrDsDiff: lambda self, e: self.func( 729 "DATE_DIFF", 730 f"'{e.args.get('unit') or 'DAY'}'", 731 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 732 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 733 ), 734 exp.UnixToStr: lambda self, e: self.func( 735 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 736 ), 737 exp.DatetimeTrunc: lambda self, e: self.func( 738 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 739 ), 740 exp.UnixToTime: _unix_to_time_sql, 741 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 742 exp.VariancePop: rename_func("VAR_POP"), 743 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 744 exp.Xor: bool_xor_sql, 745 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 746 rename_func("LEVENSHTEIN") 747 ), 748 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 749 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 750 exp.DateBin: rename_func("TIME_BUCKET"), 751 } 752 753 SUPPORTED_JSON_PATH_PARTS = { 754 exp.JSONPathKey, 755 exp.JSONPathRoot, 756 exp.JSONPathSubscript, 757 exp.JSONPathWildcard, 758 } 759 760 TYPE_MAPPING = { 761 **generator.Generator.TYPE_MAPPING, 762 exp.DataType.Type.BINARY: "BLOB", 763 exp.DataType.Type.BPCHAR: "TEXT", 764 exp.DataType.Type.CHAR: "TEXT", 765 exp.DataType.Type.DATETIME: "TIMESTAMP", 766 exp.DataType.Type.FLOAT: "REAL", 767 exp.DataType.Type.JSONB: "JSON", 768 exp.DataType.Type.NCHAR: "TEXT", 769 exp.DataType.Type.NVARCHAR: "TEXT", 770 exp.DataType.Type.UINT: "UINTEGER", 771 exp.DataType.Type.VARBINARY: "BLOB", 772 exp.DataType.Type.ROWVERSION: "BLOB", 773 exp.DataType.Type.VARCHAR: "TEXT", 774 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 775 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 776 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 777 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 778 } 779 780 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 781 RESERVED_KEYWORDS = { 782 "array", 783 "analyse", 784 "union", 785 "all", 786 "when", 787 "in_p", 788 "default", 789 "create_p", 790 "window", 791 "asymmetric", 792 "to", 793 "else", 794 "localtime", 795 "from", 796 "end_p", 797 "select", 798 "current_date", 799 "foreign", 800 "with", 801 "grant", 802 "session_user", 803 "or", 804 "except", 805 "references", 806 "fetch", 807 "limit", 808 "group_p", 809 "leading", 810 "into", 811 "collate", 812 "offset", 813 "do", 814 "then", 815 "localtimestamp", 816 "check_p", 817 "lateral_p", 818 "current_role", 819 "where", 820 "asc_p", 821 "placing", 822 "desc_p", 823 "user", 824 "unique", 825 "initially", 826 "column", 827 "both", 828 "some", 829 "as", 830 "any", 831 "only", 832 "deferrable", 833 "null_p", 834 "current_time", 835 "true_p", 836 "table", 837 "case", 838 "trailing", 839 "variadic", 840 "for", 841 "on", 842 "distinct", 843 "false_p", 844 "not", 845 "constraint", 846 "current_timestamp", 847 "returning", 848 "primary", 849 "intersect", 850 "having", 851 "analyze", 852 "current_user", 853 "and", 854 "cast", 855 "symmetric", 856 "using", 857 "order", 858 "current_catalog", 859 } 860 861 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 862 863 # DuckDB doesn't generally support CREATE TABLE .. properties 864 # https://duckdb.org/docs/sql/statements/create_table.html 865 PROPERTIES_LOCATION = { 866 prop: exp.Properties.Location.UNSUPPORTED 867 for prop in generator.Generator.PROPERTIES_LOCATION 868 } 869 870 # There are a few exceptions (e.g. temporary tables) which are supported or 871 # can be transpiled to DuckDB, so we explicitly override them accordingly 872 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 873 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 874 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 875 876 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 877 exp.FirstValue, 878 exp.Lag, 879 exp.LastValue, 880 exp.Lead, 881 exp.NthValue, 882 ) 883 884 def show_sql(self, expression: exp.Show) -> str: 885 return f"SHOW {expression.name}" 886 887 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 888 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 889 890 def strtotime_sql(self, expression: exp.StrToTime) -> str: 891 if expression.args.get("safe"): 892 formatted_time = self.format_time(expression) 893 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 894 return str_to_time_sql(self, expression) 895 896 def strtodate_sql(self, expression: exp.StrToDate) -> str: 897 if expression.args.get("safe"): 898 formatted_time = self.format_time(expression) 899 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 900 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 901 902 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 903 arg = expression.this 904 if expression.args.get("safe"): 905 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 906 return self.func("JSON", arg) 907 908 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 909 nano = expression.args.get("nano") 910 if nano is not None: 911 expression.set( 912 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 913 ) 914 915 return rename_func("MAKE_TIME")(self, expression) 916 917 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 918 sec = expression.args["sec"] 919 920 milli = expression.args.get("milli") 921 if milli is not None: 922 sec += milli.pop() / exp.Literal.number(1000.0) 923 924 nano = expression.args.get("nano") 925 if nano is not None: 926 sec += nano.pop() / exp.Literal.number(1000000000.0) 927 928 if milli or nano: 929 expression.set("sec", sec) 930 931 return rename_func("MAKE_TIMESTAMP")(self, expression) 932 933 def tablesample_sql( 934 self, 935 expression: exp.TableSample, 936 tablesample_keyword: t.Optional[str] = None, 937 ) -> str: 938 if not isinstance(expression.parent, exp.Select): 939 # This sample clause only applies to a single source, not the entire resulting relation 940 tablesample_keyword = "TABLESAMPLE" 941 942 if expression.args.get("size"): 943 method = expression.args.get("method") 944 if method and method.name.upper() != "RESERVOIR": 945 self.unsupported( 946 f"Sampling method {method} is not supported with a discrete sample count, " 947 "defaulting to reservoir sampling" 948 ) 949 expression.set("method", exp.var("RESERVOIR")) 950 951 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 952 953 def interval_sql(self, expression: exp.Interval) -> str: 954 multiplier: t.Optional[int] = None 955 unit = expression.text("unit").lower() 956 957 if unit.startswith("week"): 958 multiplier = 7 959 if unit.startswith("quarter"): 960 multiplier = 90 961 962 if multiplier: 963 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 964 965 return super().interval_sql(expression) 966 967 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 968 if isinstance(expression.parent, exp.UserDefinedFunction): 969 return self.sql(expression, "this") 970 return super().columndef_sql(expression, sep) 971 972 def join_sql(self, expression: exp.Join) -> str: 973 if ( 974 expression.side == "LEFT" 975 and not expression.args.get("on") 976 and isinstance(expression.this, exp.Unnest) 977 ): 978 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 979 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 980 return super().join_sql(expression.on(exp.true())) 981 982 return super().join_sql(expression) 983 984 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 985 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 986 if expression.args.get("is_end_exclusive"): 987 return rename_func("RANGE")(self, expression) 988 989 return self.function_fallback_sql(expression) 990 991 def countif_sql(self, expression: exp.CountIf) -> str: 992 if self.dialect.version >= Version("1.2"): 993 return self.function_fallback_sql(expression) 994 995 # https://github.com/tobymao/sqlglot/pull/4749 996 return count_if_to_sum(self, expression) 997 998 def bracket_sql(self, expression: exp.Bracket) -> str: 999 if self.dialect.version >= Version("1.2"): 1000 return super().bracket_sql(expression) 1001 1002 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1003 this = expression.this 1004 if isinstance(this, exp.Array): 1005 this.replace(exp.paren(this)) 1006 1007 bracket = super().bracket_sql(expression) 1008 1009 if not expression.args.get("returns_list_for_maps"): 1010 if not this.type: 1011 from sqlglot.optimizer.annotate_types import annotate_types 1012 1013 this = annotate_types(this, dialect=self.dialect) 1014 1015 if this.is_type(exp.DataType.Type.MAP): 1016 bracket = f"({bracket})[1]" 1017 1018 return bracket 1019 1020 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1021 expression_sql = self.sql(expression, "expression") 1022 1023 func = expression.this 1024 if isinstance(func, exp.PERCENTILES): 1025 # Make the order key the first arg and slide the fraction to the right 1026 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1027 order_col = expression.find(exp.Ordered) 1028 if order_col: 1029 func.set("expression", func.this) 1030 func.set("this", order_col.this) 1031 1032 this = self.sql(expression, "this").rstrip(")") 1033 1034 return f"{this}{expression_sql})" 1035 1036 def length_sql(self, expression: exp.Length) -> str: 1037 arg = expression.this 1038 1039 # Dialects like BQ and Snowflake also accept binary values as args, so 1040 # DDB will attempt to infer the type or resort to case/when resolution 1041 if not expression.args.get("binary") or arg.is_string: 1042 return self.func("LENGTH", arg) 1043 1044 if not arg.type: 1045 from sqlglot.optimizer.annotate_types import annotate_types 1046 1047 arg = annotate_types(arg, dialect=self.dialect) 1048 1049 if arg.is_type(*exp.DataType.TEXT_TYPES): 1050 return self.func("LENGTH", arg) 1051 1052 # We need these casts to make duckdb's static type checker happy 1053 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1054 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1055 1056 case = ( 1057 exp.case(self.func("TYPEOF", arg)) 1058 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1059 .else_( 1060 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1061 ) # anonymous to break length_sql recursion 1062 ) 1063 1064 return self.sql(case) 1065 1066 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1067 this = expression.this 1068 key = expression.args.get("key") 1069 key_sql = key.name if isinstance(key, exp.Expression) else "" 1070 value_sql = self.sql(expression, "value") 1071 1072 kv_sql = f"{key_sql} := {value_sql}" 1073 1074 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1075 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1076 if isinstance(this, exp.Struct) and not this.expressions: 1077 return self.func("STRUCT_PACK", kv_sql) 1078 1079 return self.func("STRUCT_INSERT", this, kv_sql) 1080 1081 def unnest_sql(self, expression: exp.Unnest) -> str: 1082 explode_array = expression.args.get("explode_array") 1083 if explode_array: 1084 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1085 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1086 expression.expressions.append( 1087 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1088 ) 1089 1090 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1091 alias = expression.args.get("alias") 1092 if alias: 1093 expression.set("alias", None) 1094 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1095 1096 unnest_sql = super().unnest_sql(expression) 1097 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1098 return self.sql(select) 1099 1100 return super().unnest_sql(expression) 1101 1102 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1103 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1104 # DuckDB should render IGNORE NULLS only for the general-purpose 1105 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1106 return super().ignorenulls_sql(expression) 1107 1108 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1109 return self.sql(expression, "this") 1110 1111 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1112 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1113 # DuckDB should render RESPECT NULLS only for the general-purpose 1114 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1115 return super().respectnulls_sql(expression) 1116 1117 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1118 return self.sql(expression, "this") 1119 1120 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1121 this = self.sql(expression, "this") 1122 null_text = self.sql(expression, "null") 1123 1124 if null_text: 1125 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1126 1127 return self.func("ARRAY_TO_STRING", this, expression.expression) 1128 1129 @unsupported_args("position", "occurrence") 1130 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1131 group = expression.args.get("group") 1132 params = expression.args.get("parameters") 1133 1134 # Do not render group if there is no following argument, 1135 # and it's the default value for this dialect 1136 if ( 1137 not params 1138 and group 1139 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1140 ): 1141 group = None 1142 return self.func( 1143 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1144 ) 1145 1146 @unsupported_args("culture") 1147 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1148 fmt = expression.args.get("format") 1149 if fmt and fmt.is_int: 1150 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1151 1152 self.unsupported("Only integer formats are supported by NumberToStr") 1153 return self.function_fallback_sql(expression) 1154 1155 def autoincrementcolumnconstraint_sql(self, _) -> str: 1156 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1157 return ""
279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path) 304 305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "DATETIME": TokenType.TIMESTAMPNTZ, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 } 351 352 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 353 354 class Parser(parser.Parser): 355 BITWISE = { 356 **parser.Parser.BITWISE, 357 TokenType.TILDA: exp.RegexpLike, 358 } 359 BITWISE.pop(TokenType.CARET) 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 364 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 365 } 366 367 EXPONENT = { 368 **parser.Parser.EXPONENT, 369 TokenType.CARET: exp.Pow, 370 TokenType.DSTAR: exp.Pow, 371 } 372 373 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 374 375 SHOW_PARSERS = { 376 "TABLES": _show_parser("TABLES"), 377 "ALL TABLES": _show_parser("ALL TABLES"), 378 } 379 380 FUNCTIONS = { 381 **parser.Parser.FUNCTIONS, 382 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 383 "ARRAY_SORT": exp.SortArray.from_arg_list, 384 "DATEDIFF": _build_date_diff, 385 "DATE_DIFF": _build_date_diff, 386 "DATE_TRUNC": date_trunc_to_time, 387 "DATETRUNC": date_trunc_to_time, 388 "DECODE": lambda args: exp.Decode( 389 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 390 ), 391 "EDITDIST3": exp.Levenshtein.from_arg_list, 392 "ENCODE": lambda args: exp.Encode( 393 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 394 ), 395 "EPOCH": exp.TimeToUnix.from_arg_list, 396 "EPOCH_MS": lambda args: exp.UnixToTime( 397 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 398 ), 399 "GENERATE_SERIES": _build_generate_series(), 400 "JSON": exp.ParseJSON.from_arg_list, 401 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 402 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 403 "LIST_HAS": exp.ArrayContains.from_arg_list, 404 "LIST_REVERSE_SORT": _build_sort_array_desc, 405 "LIST_SORT": exp.SortArray.from_arg_list, 406 "LIST_VALUE": lambda args: exp.Array(expressions=args), 407 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 408 "MAKE_TIMESTAMP": _build_make_timestamp, 409 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 410 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 411 "RANGE": _build_generate_series(end_exclusive=True), 412 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 413 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 414 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 415 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 416 this=seq_get(args, 0), 417 expression=seq_get(args, 1), 418 replacement=seq_get(args, 2), 419 modifiers=seq_get(args, 3), 420 ), 421 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 422 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 423 "STRING_SPLIT": exp.Split.from_arg_list, 424 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 425 "STRING_TO_ARRAY": exp.Split.from_arg_list, 426 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 427 "STRUCT_PACK": exp.Struct.from_arg_list, 428 "STR_SPLIT": exp.Split.from_arg_list, 429 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 430 "TIME_BUCKET": exp.DateBin.from_arg_list, 431 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 432 "UNNEST": exp.Explode.from_arg_list, 433 "XOR": binary_from_function(exp.BitwiseXor), 434 } 435 436 FUNCTIONS.pop("DATE_SUB") 437 FUNCTIONS.pop("GLOB") 438 439 FUNCTION_PARSERS = { 440 **parser.Parser.FUNCTION_PARSERS, 441 **dict.fromkeys( 442 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 443 ), 444 } 445 FUNCTION_PARSERS.pop("DECODE") 446 447 NO_PAREN_FUNCTION_PARSERS = { 448 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 449 "MAP": lambda self: self._parse_map(), 450 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 451 } 452 453 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 PLACEHOLDER_PARSERS = { 459 **parser.Parser.PLACEHOLDER_PARSERS, 460 TokenType.PARAMETER: lambda self: ( 461 self.expression(exp.Placeholder, this=self._prev.text) 462 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 463 else None 464 ), 465 } 466 467 TYPE_CONVERTERS = { 468 # https://duckdb.org/docs/sql/data_types/numeric 469 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 470 # https://duckdb.org/docs/sql/data_types/text 471 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 472 } 473 474 STATEMENT_PARSERS = { 475 **parser.Parser.STATEMENT_PARSERS, 476 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 477 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 478 TokenType.SHOW: lambda self: self._parse_show(), 479 } 480 481 def _parse_expression(self) -> t.Optional[exp.Expression]: 482 # DuckDB supports prefix aliases, e.g. foo: 1 483 if self._next and self._next.token_type == TokenType.COLON: 484 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 485 self._match(TokenType.COLON) 486 comments = self._prev_comments or [] 487 488 this = self._parse_assignment() 489 if isinstance(this, exp.Expression): 490 # Moves the comment next to the alias in `alias: expr /* comment */` 491 comments += this.pop_comments() or [] 492 493 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 494 495 return super()._parse_expression() 496 497 def _parse_table( 498 self, 499 schema: bool = False, 500 joins: bool = False, 501 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 502 parse_bracket: bool = False, 503 is_db_reference: bool = False, 504 parse_partition: bool = False, 505 ) -> t.Optional[exp.Expression]: 506 # DuckDB supports prefix aliases, e.g. FROM foo: bar 507 if self._next and self._next.token_type == TokenType.COLON: 508 alias = self._parse_table_alias( 509 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 510 ) 511 self._match(TokenType.COLON) 512 comments = self._prev_comments or [] 513 else: 514 alias = None 515 comments = [] 516 517 table = super()._parse_table( 518 schema=schema, 519 joins=joins, 520 alias_tokens=alias_tokens, 521 parse_bracket=parse_bracket, 522 is_db_reference=is_db_reference, 523 parse_partition=parse_partition, 524 ) 525 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 526 # Moves the comment next to the alias in `alias: table /* comment */` 527 comments += table.pop_comments() or [] 528 alias.comments = alias.pop_comments() + comments 529 table.set("alias", alias) 530 531 return table 532 533 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 534 # https://duckdb.org/docs/sql/samples.html 535 sample = super()._parse_table_sample(as_modifier=as_modifier) 536 if sample and not sample.args.get("method"): 537 if sample.args.get("size"): 538 sample.set("method", exp.var("RESERVOIR")) 539 else: 540 sample.set("method", exp.var("SYSTEM")) 541 542 return sample 543 544 def _parse_bracket( 545 self, this: t.Optional[exp.Expression] = None 546 ) -> t.Optional[exp.Expression]: 547 bracket = super()._parse_bracket(this) 548 549 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 550 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 551 bracket.set("returns_list_for_maps", True) 552 553 return bracket 554 555 def _parse_map(self) -> exp.ToMap | exp.Map: 556 if self._match(TokenType.L_BRACE, advance=False): 557 return self.expression(exp.ToMap, this=self._parse_bracket()) 558 559 args = self._parse_wrapped_csv(self._parse_assignment) 560 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 561 562 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 563 return self._parse_field_def() 564 565 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 566 if len(aggregations) == 1: 567 return super()._pivot_column_names(aggregations) 568 return pivot_column_names(aggregations, dialect="duckdb") 569 570 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 571 def _parse_attach_option() -> exp.AttachOption: 572 return self.expression( 573 exp.AttachOption, 574 this=self._parse_var(any_token=True), 575 expression=self._parse_field(any_token=True), 576 ) 577 578 self._match(TokenType.DATABASE) 579 exists = self._parse_exists(not_=is_attach) 580 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 581 582 if self._match(TokenType.L_PAREN, advance=False): 583 expressions = self._parse_wrapped_csv(_parse_attach_option) 584 else: 585 expressions = None 586 587 return ( 588 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 589 if is_attach 590 else self.expression(exp.Detach, this=this, exists=exists) 591 ) 592 593 def _parse_show_duckdb(self, this: str) -> exp.Show: 594 return self.expression(exp.Show, this=this) 595 596 class Generator(generator.Generator): 597 PARAMETER_TOKEN = "$" 598 NAMED_PLACEHOLDER_TOKEN = "$" 599 JOIN_HINTS = False 600 TABLE_HINTS = False 601 QUERY_HINTS = False 602 LIMIT_FETCH = "LIMIT" 603 STRUCT_DELIMITER = ("(", ")") 604 RENAME_TABLE_WITH_DB = False 605 NVL2_SUPPORTED = False 606 SEMI_ANTI_JOIN_WITH_SIDE = False 607 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 608 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 609 LAST_DAY_SUPPORTS_DATE_PART = False 610 JSON_KEY_VALUE_PAIR_SEP = "," 611 IGNORE_NULLS_IN_FUNC = True 612 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 613 SUPPORTS_CREATE_TABLE_LIKE = False 614 MULTI_ARG_DISTINCT = False 615 CAN_IMPLEMENT_ARRAY_ANY = True 616 SUPPORTS_TO_NUMBER = False 617 SUPPORTS_WINDOW_EXCLUDE = True 618 COPY_HAS_INTO_KEYWORD = False 619 STAR_EXCEPT = "EXCLUDE" 620 PAD_FILL_PATTERN_IS_REQUIRED = True 621 ARRAY_CONCAT_IS_VAR_LEN = False 622 ARRAY_SIZE_DIM_REQUIRED = False 623 624 TRANSFORMS = { 625 **generator.Generator.TRANSFORMS, 626 exp.ApproxDistinct: approx_count_distinct_sql, 627 exp.Array: inline_array_unless_query, 628 exp.ArrayFilter: rename_func("LIST_FILTER"), 629 exp.ArraySort: _array_sort_sql, 630 exp.ArraySum: rename_func("LIST_SUM"), 631 exp.BitwiseXor: rename_func("XOR"), 632 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 633 exp.CurrentDate: lambda *_: "CURRENT_DATE", 634 exp.CurrentTime: lambda *_: "CURRENT_TIME", 635 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 636 exp.DayOfMonth: rename_func("DAYOFMONTH"), 637 exp.DayOfWeek: rename_func("DAYOFWEEK"), 638 exp.DayOfWeekIso: rename_func("ISODOW"), 639 exp.DayOfYear: rename_func("DAYOFYEAR"), 640 exp.DataType: _datatype_sql, 641 exp.Date: _date_sql, 642 exp.DateAdd: _date_delta_sql, 643 exp.DateFromParts: rename_func("MAKE_DATE"), 644 exp.DateSub: _date_delta_sql, 645 exp.DateDiff: _date_diff_sql, 646 exp.DateStrToDate: datestrtodate_sql, 647 exp.Datetime: no_datetime_sql, 648 exp.DatetimeSub: _date_delta_sql, 649 exp.DatetimeAdd: _date_delta_sql, 650 exp.DateToDi: lambda self, 651 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 652 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 653 exp.DiToDate: lambda self, 654 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 655 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 656 exp.GenerateDateArray: _generate_datetime_array_sql, 657 exp.GenerateTimestampArray: _generate_datetime_array_sql, 658 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 659 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 660 exp.Explode: rename_func("UNNEST"), 661 exp.IntDiv: lambda self, e: self.binary(e, "//"), 662 exp.IsInf: rename_func("ISINF"), 663 exp.IsNan: rename_func("ISNAN"), 664 exp.JSONBExists: rename_func("JSON_EXISTS"), 665 exp.JSONExtract: _arrow_json_extract_sql, 666 exp.JSONExtractArray: _json_extract_value_array_sql, 667 exp.JSONExtractScalar: _arrow_json_extract_sql, 668 exp.JSONFormat: _json_format_sql, 669 exp.JSONValueArray: _json_extract_value_array_sql, 670 exp.Lateral: explode_to_unnest_sql, 671 exp.LogicalOr: rename_func("BOOL_OR"), 672 exp.LogicalAnd: rename_func("BOOL_AND"), 673 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 674 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 675 exp.MonthsBetween: lambda self, e: self.func( 676 "DATEDIFF", 677 "'month'", 678 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 679 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 680 ), 681 exp.PercentileCont: rename_func("QUANTILE_CONT"), 682 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 683 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 684 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 685 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 686 exp.RegexpReplace: lambda self, e: self.func( 687 "REGEXP_REPLACE", 688 e.this, 689 e.expression, 690 e.args.get("replacement"), 691 e.args.get("modifiers"), 692 ), 693 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 694 exp.RegexpILike: lambda self, e: self.func( 695 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 696 ), 697 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 698 exp.Return: lambda self, e: self.sql(e, "this"), 699 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 700 exp.Rand: rename_func("RANDOM"), 701 exp.SHA: rename_func("SHA1"), 702 exp.SHA2: sha256_sql, 703 exp.Split: rename_func("STR_SPLIT"), 704 exp.SortArray: _sort_array_sql, 705 exp.StrPosition: strposition_sql, 706 exp.StrToUnix: lambda self, e: self.func( 707 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 708 ), 709 exp.Struct: _struct_sql, 710 exp.Transform: rename_func("LIST_TRANSFORM"), 711 exp.TimeAdd: _date_delta_sql, 712 exp.Time: no_time_sql, 713 exp.TimeDiff: _timediff_sql, 714 exp.Timestamp: no_timestamp_sql, 715 exp.TimestampDiff: lambda self, e: self.func( 716 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 717 ), 718 exp.TimestampTrunc: timestamptrunc_sql(), 719 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 720 exp.TimeStrToTime: timestrtotime_sql, 721 exp.TimeStrToUnix: lambda self, e: self.func( 722 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 723 ), 724 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 725 exp.TimeToUnix: rename_func("EPOCH"), 726 exp.TsOrDiToDi: lambda self, 727 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 728 exp.TsOrDsAdd: _date_delta_sql, 729 exp.TsOrDsDiff: lambda self, e: self.func( 730 "DATE_DIFF", 731 f"'{e.args.get('unit') or 'DAY'}'", 732 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 733 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 734 ), 735 exp.UnixToStr: lambda self, e: self.func( 736 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 737 ), 738 exp.DatetimeTrunc: lambda self, e: self.func( 739 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 740 ), 741 exp.UnixToTime: _unix_to_time_sql, 742 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 743 exp.VariancePop: rename_func("VAR_POP"), 744 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 745 exp.Xor: bool_xor_sql, 746 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 747 rename_func("LEVENSHTEIN") 748 ), 749 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 750 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 751 exp.DateBin: rename_func("TIME_BUCKET"), 752 } 753 754 SUPPORTED_JSON_PATH_PARTS = { 755 exp.JSONPathKey, 756 exp.JSONPathRoot, 757 exp.JSONPathSubscript, 758 exp.JSONPathWildcard, 759 } 760 761 TYPE_MAPPING = { 762 **generator.Generator.TYPE_MAPPING, 763 exp.DataType.Type.BINARY: "BLOB", 764 exp.DataType.Type.BPCHAR: "TEXT", 765 exp.DataType.Type.CHAR: "TEXT", 766 exp.DataType.Type.DATETIME: "TIMESTAMP", 767 exp.DataType.Type.FLOAT: "REAL", 768 exp.DataType.Type.JSONB: "JSON", 769 exp.DataType.Type.NCHAR: "TEXT", 770 exp.DataType.Type.NVARCHAR: "TEXT", 771 exp.DataType.Type.UINT: "UINTEGER", 772 exp.DataType.Type.VARBINARY: "BLOB", 773 exp.DataType.Type.ROWVERSION: "BLOB", 774 exp.DataType.Type.VARCHAR: "TEXT", 775 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 776 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 777 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 778 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 779 } 780 781 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 782 RESERVED_KEYWORDS = { 783 "array", 784 "analyse", 785 "union", 786 "all", 787 "when", 788 "in_p", 789 "default", 790 "create_p", 791 "window", 792 "asymmetric", 793 "to", 794 "else", 795 "localtime", 796 "from", 797 "end_p", 798 "select", 799 "current_date", 800 "foreign", 801 "with", 802 "grant", 803 "session_user", 804 "or", 805 "except", 806 "references", 807 "fetch", 808 "limit", 809 "group_p", 810 "leading", 811 "into", 812 "collate", 813 "offset", 814 "do", 815 "then", 816 "localtimestamp", 817 "check_p", 818 "lateral_p", 819 "current_role", 820 "where", 821 "asc_p", 822 "placing", 823 "desc_p", 824 "user", 825 "unique", 826 "initially", 827 "column", 828 "both", 829 "some", 830 "as", 831 "any", 832 "only", 833 "deferrable", 834 "null_p", 835 "current_time", 836 "true_p", 837 "table", 838 "case", 839 "trailing", 840 "variadic", 841 "for", 842 "on", 843 "distinct", 844 "false_p", 845 "not", 846 "constraint", 847 "current_timestamp", 848 "returning", 849 "primary", 850 "intersect", 851 "having", 852 "analyze", 853 "current_user", 854 "and", 855 "cast", 856 "symmetric", 857 "using", 858 "order", 859 "current_catalog", 860 } 861 862 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 863 864 # DuckDB doesn't generally support CREATE TABLE .. properties 865 # https://duckdb.org/docs/sql/statements/create_table.html 866 PROPERTIES_LOCATION = { 867 prop: exp.Properties.Location.UNSUPPORTED 868 for prop in generator.Generator.PROPERTIES_LOCATION 869 } 870 871 # There are a few exceptions (e.g. temporary tables) which are supported or 872 # can be transpiled to DuckDB, so we explicitly override them accordingly 873 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 874 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 875 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 876 877 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 878 exp.FirstValue, 879 exp.Lag, 880 exp.LastValue, 881 exp.Lead, 882 exp.NthValue, 883 ) 884 885 def show_sql(self, expression: exp.Show) -> str: 886 return f"SHOW {expression.name}" 887 888 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 889 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 890 891 def strtotime_sql(self, expression: exp.StrToTime) -> str: 892 if expression.args.get("safe"): 893 formatted_time = self.format_time(expression) 894 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 895 return str_to_time_sql(self, expression) 896 897 def strtodate_sql(self, expression: exp.StrToDate) -> str: 898 if expression.args.get("safe"): 899 formatted_time = self.format_time(expression) 900 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 901 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 902 903 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 904 arg = expression.this 905 if expression.args.get("safe"): 906 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 907 return self.func("JSON", arg) 908 909 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 910 nano = expression.args.get("nano") 911 if nano is not None: 912 expression.set( 913 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 914 ) 915 916 return rename_func("MAKE_TIME")(self, expression) 917 918 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 919 sec = expression.args["sec"] 920 921 milli = expression.args.get("milli") 922 if milli is not None: 923 sec += milli.pop() / exp.Literal.number(1000.0) 924 925 nano = expression.args.get("nano") 926 if nano is not None: 927 sec += nano.pop() / exp.Literal.number(1000000000.0) 928 929 if milli or nano: 930 expression.set("sec", sec) 931 932 return rename_func("MAKE_TIMESTAMP")(self, expression) 933 934 def tablesample_sql( 935 self, 936 expression: exp.TableSample, 937 tablesample_keyword: t.Optional[str] = None, 938 ) -> str: 939 if not isinstance(expression.parent, exp.Select): 940 # This sample clause only applies to a single source, not the entire resulting relation 941 tablesample_keyword = "TABLESAMPLE" 942 943 if expression.args.get("size"): 944 method = expression.args.get("method") 945 if method and method.name.upper() != "RESERVOIR": 946 self.unsupported( 947 f"Sampling method {method} is not supported with a discrete sample count, " 948 "defaulting to reservoir sampling" 949 ) 950 expression.set("method", exp.var("RESERVOIR")) 951 952 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 953 954 def interval_sql(self, expression: exp.Interval) -> str: 955 multiplier: t.Optional[int] = None 956 unit = expression.text("unit").lower() 957 958 if unit.startswith("week"): 959 multiplier = 7 960 if unit.startswith("quarter"): 961 multiplier = 90 962 963 if multiplier: 964 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 965 966 return super().interval_sql(expression) 967 968 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 969 if isinstance(expression.parent, exp.UserDefinedFunction): 970 return self.sql(expression, "this") 971 return super().columndef_sql(expression, sep) 972 973 def join_sql(self, expression: exp.Join) -> str: 974 if ( 975 expression.side == "LEFT" 976 and not expression.args.get("on") 977 and isinstance(expression.this, exp.Unnest) 978 ): 979 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 980 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 981 return super().join_sql(expression.on(exp.true())) 982 983 return super().join_sql(expression) 984 985 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 986 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 987 if expression.args.get("is_end_exclusive"): 988 return rename_func("RANGE")(self, expression) 989 990 return self.function_fallback_sql(expression) 991 992 def countif_sql(self, expression: exp.CountIf) -> str: 993 if self.dialect.version >= Version("1.2"): 994 return self.function_fallback_sql(expression) 995 996 # https://github.com/tobymao/sqlglot/pull/4749 997 return count_if_to_sum(self, expression) 998 999 def bracket_sql(self, expression: exp.Bracket) -> str: 1000 if self.dialect.version >= Version("1.2"): 1001 return super().bracket_sql(expression) 1002 1003 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1004 this = expression.this 1005 if isinstance(this, exp.Array): 1006 this.replace(exp.paren(this)) 1007 1008 bracket = super().bracket_sql(expression) 1009 1010 if not expression.args.get("returns_list_for_maps"): 1011 if not this.type: 1012 from sqlglot.optimizer.annotate_types import annotate_types 1013 1014 this = annotate_types(this, dialect=self.dialect) 1015 1016 if this.is_type(exp.DataType.Type.MAP): 1017 bracket = f"({bracket})[1]" 1018 1019 return bracket 1020 1021 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1022 expression_sql = self.sql(expression, "expression") 1023 1024 func = expression.this 1025 if isinstance(func, exp.PERCENTILES): 1026 # Make the order key the first arg and slide the fraction to the right 1027 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1028 order_col = expression.find(exp.Ordered) 1029 if order_col: 1030 func.set("expression", func.this) 1031 func.set("this", order_col.this) 1032 1033 this = self.sql(expression, "this").rstrip(")") 1034 1035 return f"{this}{expression_sql})" 1036 1037 def length_sql(self, expression: exp.Length) -> str: 1038 arg = expression.this 1039 1040 # Dialects like BQ and Snowflake also accept binary values as args, so 1041 # DDB will attempt to infer the type or resort to case/when resolution 1042 if not expression.args.get("binary") or arg.is_string: 1043 return self.func("LENGTH", arg) 1044 1045 if not arg.type: 1046 from sqlglot.optimizer.annotate_types import annotate_types 1047 1048 arg = annotate_types(arg, dialect=self.dialect) 1049 1050 if arg.is_type(*exp.DataType.TEXT_TYPES): 1051 return self.func("LENGTH", arg) 1052 1053 # We need these casts to make duckdb's static type checker happy 1054 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1055 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1056 1057 case = ( 1058 exp.case(self.func("TYPEOF", arg)) 1059 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1060 .else_( 1061 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1062 ) # anonymous to break length_sql recursion 1063 ) 1064 1065 return self.sql(case) 1066 1067 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1068 this = expression.this 1069 key = expression.args.get("key") 1070 key_sql = key.name if isinstance(key, exp.Expression) else "" 1071 value_sql = self.sql(expression, "value") 1072 1073 kv_sql = f"{key_sql} := {value_sql}" 1074 1075 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1076 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1077 if isinstance(this, exp.Struct) and not this.expressions: 1078 return self.func("STRUCT_PACK", kv_sql) 1079 1080 return self.func("STRUCT_INSERT", this, kv_sql) 1081 1082 def unnest_sql(self, expression: exp.Unnest) -> str: 1083 explode_array = expression.args.get("explode_array") 1084 if explode_array: 1085 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1086 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1087 expression.expressions.append( 1088 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1089 ) 1090 1091 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1092 alias = expression.args.get("alias") 1093 if alias: 1094 expression.set("alias", None) 1095 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1096 1097 unnest_sql = super().unnest_sql(expression) 1098 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1099 return self.sql(select) 1100 1101 return super().unnest_sql(expression) 1102 1103 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1104 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1105 # DuckDB should render IGNORE NULLS only for the general-purpose 1106 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1107 return super().ignorenulls_sql(expression) 1108 1109 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1110 return self.sql(expression, "this") 1111 1112 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render RESPECT NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1116 return super().respectnulls_sql(expression) 1117 1118 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this") 1120 1121 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1122 this = self.sql(expression, "this") 1123 null_text = self.sql(expression, "null") 1124 1125 if null_text: 1126 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1127 1128 return self.func("ARRAY_TO_STRING", this, expression.expression) 1129 1130 @unsupported_args("position", "occurrence") 1131 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1132 group = expression.args.get("group") 1133 params = expression.args.get("parameters") 1134 1135 # Do not render group if there is no following argument, 1136 # and it's the default value for this dialect 1137 if ( 1138 not params 1139 and group 1140 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1141 ): 1142 group = None 1143 return self.func( 1144 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1145 ) 1146 1147 @unsupported_args("culture") 1148 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1149 fmt = expression.args.get("format") 1150 if fmt and fmt.is_int: 1151 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1152 1153 self.unsupported("Only integer formats are supported by NumberToStr") 1154 return self.function_fallback_sql(expression) 1155 1156 def autoincrementcolumnconstraint_sql(self, _) -> str: 1157 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1158 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path)
305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "DATETIME": TokenType.TIMESTAMPNTZ, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 } 351 352 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
354 class Parser(parser.Parser): 355 BITWISE = { 356 **parser.Parser.BITWISE, 357 TokenType.TILDA: exp.RegexpLike, 358 } 359 BITWISE.pop(TokenType.CARET) 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 364 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 365 } 366 367 EXPONENT = { 368 **parser.Parser.EXPONENT, 369 TokenType.CARET: exp.Pow, 370 TokenType.DSTAR: exp.Pow, 371 } 372 373 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 374 375 SHOW_PARSERS = { 376 "TABLES": _show_parser("TABLES"), 377 "ALL TABLES": _show_parser("ALL TABLES"), 378 } 379 380 FUNCTIONS = { 381 **parser.Parser.FUNCTIONS, 382 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 383 "ARRAY_SORT": exp.SortArray.from_arg_list, 384 "DATEDIFF": _build_date_diff, 385 "DATE_DIFF": _build_date_diff, 386 "DATE_TRUNC": date_trunc_to_time, 387 "DATETRUNC": date_trunc_to_time, 388 "DECODE": lambda args: exp.Decode( 389 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 390 ), 391 "EDITDIST3": exp.Levenshtein.from_arg_list, 392 "ENCODE": lambda args: exp.Encode( 393 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 394 ), 395 "EPOCH": exp.TimeToUnix.from_arg_list, 396 "EPOCH_MS": lambda args: exp.UnixToTime( 397 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 398 ), 399 "GENERATE_SERIES": _build_generate_series(), 400 "JSON": exp.ParseJSON.from_arg_list, 401 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 402 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 403 "LIST_HAS": exp.ArrayContains.from_arg_list, 404 "LIST_REVERSE_SORT": _build_sort_array_desc, 405 "LIST_SORT": exp.SortArray.from_arg_list, 406 "LIST_VALUE": lambda args: exp.Array(expressions=args), 407 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 408 "MAKE_TIMESTAMP": _build_make_timestamp, 409 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 410 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 411 "RANGE": _build_generate_series(end_exclusive=True), 412 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 413 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 414 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 415 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 416 this=seq_get(args, 0), 417 expression=seq_get(args, 1), 418 replacement=seq_get(args, 2), 419 modifiers=seq_get(args, 3), 420 ), 421 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 422 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 423 "STRING_SPLIT": exp.Split.from_arg_list, 424 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 425 "STRING_TO_ARRAY": exp.Split.from_arg_list, 426 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 427 "STRUCT_PACK": exp.Struct.from_arg_list, 428 "STR_SPLIT": exp.Split.from_arg_list, 429 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 430 "TIME_BUCKET": exp.DateBin.from_arg_list, 431 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 432 "UNNEST": exp.Explode.from_arg_list, 433 "XOR": binary_from_function(exp.BitwiseXor), 434 } 435 436 FUNCTIONS.pop("DATE_SUB") 437 FUNCTIONS.pop("GLOB") 438 439 FUNCTION_PARSERS = { 440 **parser.Parser.FUNCTION_PARSERS, 441 **dict.fromkeys( 442 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 443 ), 444 } 445 FUNCTION_PARSERS.pop("DECODE") 446 447 NO_PAREN_FUNCTION_PARSERS = { 448 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 449 "MAP": lambda self: self._parse_map(), 450 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 451 } 452 453 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 PLACEHOLDER_PARSERS = { 459 **parser.Parser.PLACEHOLDER_PARSERS, 460 TokenType.PARAMETER: lambda self: ( 461 self.expression(exp.Placeholder, this=self._prev.text) 462 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 463 else None 464 ), 465 } 466 467 TYPE_CONVERTERS = { 468 # https://duckdb.org/docs/sql/data_types/numeric 469 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 470 # https://duckdb.org/docs/sql/data_types/text 471 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 472 } 473 474 STATEMENT_PARSERS = { 475 **parser.Parser.STATEMENT_PARSERS, 476 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 477 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 478 TokenType.SHOW: lambda self: self._parse_show(), 479 } 480 481 def _parse_expression(self) -> t.Optional[exp.Expression]: 482 # DuckDB supports prefix aliases, e.g. foo: 1 483 if self._next and self._next.token_type == TokenType.COLON: 484 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 485 self._match(TokenType.COLON) 486 comments = self._prev_comments or [] 487 488 this = self._parse_assignment() 489 if isinstance(this, exp.Expression): 490 # Moves the comment next to the alias in `alias: expr /* comment */` 491 comments += this.pop_comments() or [] 492 493 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 494 495 return super()._parse_expression() 496 497 def _parse_table( 498 self, 499 schema: bool = False, 500 joins: bool = False, 501 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 502 parse_bracket: bool = False, 503 is_db_reference: bool = False, 504 parse_partition: bool = False, 505 ) -> t.Optional[exp.Expression]: 506 # DuckDB supports prefix aliases, e.g. FROM foo: bar 507 if self._next and self._next.token_type == TokenType.COLON: 508 alias = self._parse_table_alias( 509 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 510 ) 511 self._match(TokenType.COLON) 512 comments = self._prev_comments or [] 513 else: 514 alias = None 515 comments = [] 516 517 table = super()._parse_table( 518 schema=schema, 519 joins=joins, 520 alias_tokens=alias_tokens, 521 parse_bracket=parse_bracket, 522 is_db_reference=is_db_reference, 523 parse_partition=parse_partition, 524 ) 525 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 526 # Moves the comment next to the alias in `alias: table /* comment */` 527 comments += table.pop_comments() or [] 528 alias.comments = alias.pop_comments() + comments 529 table.set("alias", alias) 530 531 return table 532 533 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 534 # https://duckdb.org/docs/sql/samples.html 535 sample = super()._parse_table_sample(as_modifier=as_modifier) 536 if sample and not sample.args.get("method"): 537 if sample.args.get("size"): 538 sample.set("method", exp.var("RESERVOIR")) 539 else: 540 sample.set("method", exp.var("SYSTEM")) 541 542 return sample 543 544 def _parse_bracket( 545 self, this: t.Optional[exp.Expression] = None 546 ) -> t.Optional[exp.Expression]: 547 bracket = super()._parse_bracket(this) 548 549 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 550 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 551 bracket.set("returns_list_for_maps", True) 552 553 return bracket 554 555 def _parse_map(self) -> exp.ToMap | exp.Map: 556 if self._match(TokenType.L_BRACE, advance=False): 557 return self.expression(exp.ToMap, this=self._parse_bracket()) 558 559 args = self._parse_wrapped_csv(self._parse_assignment) 560 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 561 562 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 563 return self._parse_field_def() 564 565 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 566 if len(aggregations) == 1: 567 return super()._pivot_column_names(aggregations) 568 return pivot_column_names(aggregations, dialect="duckdb") 569 570 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 571 def _parse_attach_option() -> exp.AttachOption: 572 return self.expression( 573 exp.AttachOption, 574 this=self._parse_var(any_token=True), 575 expression=self._parse_field(any_token=True), 576 ) 577 578 self._match(TokenType.DATABASE) 579 exists = self._parse_exists(not_=is_attach) 580 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 581 582 if self._match(TokenType.L_PAREN, advance=False): 583 expressions = self._parse_wrapped_csv(_parse_attach_option) 584 else: 585 expressions = None 586 587 return ( 588 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 589 if is_attach 590 else self.expression(exp.Detach, this=this, exists=exists) 591 ) 592 593 def _parse_show_duckdb(self, this: str) -> exp.Show: 594 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
596 class Generator(generator.Generator): 597 PARAMETER_TOKEN = "$" 598 NAMED_PLACEHOLDER_TOKEN = "$" 599 JOIN_HINTS = False 600 TABLE_HINTS = False 601 QUERY_HINTS = False 602 LIMIT_FETCH = "LIMIT" 603 STRUCT_DELIMITER = ("(", ")") 604 RENAME_TABLE_WITH_DB = False 605 NVL2_SUPPORTED = False 606 SEMI_ANTI_JOIN_WITH_SIDE = False 607 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 608 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 609 LAST_DAY_SUPPORTS_DATE_PART = False 610 JSON_KEY_VALUE_PAIR_SEP = "," 611 IGNORE_NULLS_IN_FUNC = True 612 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 613 SUPPORTS_CREATE_TABLE_LIKE = False 614 MULTI_ARG_DISTINCT = False 615 CAN_IMPLEMENT_ARRAY_ANY = True 616 SUPPORTS_TO_NUMBER = False 617 SUPPORTS_WINDOW_EXCLUDE = True 618 COPY_HAS_INTO_KEYWORD = False 619 STAR_EXCEPT = "EXCLUDE" 620 PAD_FILL_PATTERN_IS_REQUIRED = True 621 ARRAY_CONCAT_IS_VAR_LEN = False 622 ARRAY_SIZE_DIM_REQUIRED = False 623 624 TRANSFORMS = { 625 **generator.Generator.TRANSFORMS, 626 exp.ApproxDistinct: approx_count_distinct_sql, 627 exp.Array: inline_array_unless_query, 628 exp.ArrayFilter: rename_func("LIST_FILTER"), 629 exp.ArraySort: _array_sort_sql, 630 exp.ArraySum: rename_func("LIST_SUM"), 631 exp.BitwiseXor: rename_func("XOR"), 632 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 633 exp.CurrentDate: lambda *_: "CURRENT_DATE", 634 exp.CurrentTime: lambda *_: "CURRENT_TIME", 635 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 636 exp.DayOfMonth: rename_func("DAYOFMONTH"), 637 exp.DayOfWeek: rename_func("DAYOFWEEK"), 638 exp.DayOfWeekIso: rename_func("ISODOW"), 639 exp.DayOfYear: rename_func("DAYOFYEAR"), 640 exp.DataType: _datatype_sql, 641 exp.Date: _date_sql, 642 exp.DateAdd: _date_delta_sql, 643 exp.DateFromParts: rename_func("MAKE_DATE"), 644 exp.DateSub: _date_delta_sql, 645 exp.DateDiff: _date_diff_sql, 646 exp.DateStrToDate: datestrtodate_sql, 647 exp.Datetime: no_datetime_sql, 648 exp.DatetimeSub: _date_delta_sql, 649 exp.DatetimeAdd: _date_delta_sql, 650 exp.DateToDi: lambda self, 651 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 652 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 653 exp.DiToDate: lambda self, 654 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 655 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 656 exp.GenerateDateArray: _generate_datetime_array_sql, 657 exp.GenerateTimestampArray: _generate_datetime_array_sql, 658 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 659 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 660 exp.Explode: rename_func("UNNEST"), 661 exp.IntDiv: lambda self, e: self.binary(e, "//"), 662 exp.IsInf: rename_func("ISINF"), 663 exp.IsNan: rename_func("ISNAN"), 664 exp.JSONBExists: rename_func("JSON_EXISTS"), 665 exp.JSONExtract: _arrow_json_extract_sql, 666 exp.JSONExtractArray: _json_extract_value_array_sql, 667 exp.JSONExtractScalar: _arrow_json_extract_sql, 668 exp.JSONFormat: _json_format_sql, 669 exp.JSONValueArray: _json_extract_value_array_sql, 670 exp.Lateral: explode_to_unnest_sql, 671 exp.LogicalOr: rename_func("BOOL_OR"), 672 exp.LogicalAnd: rename_func("BOOL_AND"), 673 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 674 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 675 exp.MonthsBetween: lambda self, e: self.func( 676 "DATEDIFF", 677 "'month'", 678 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 679 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 680 ), 681 exp.PercentileCont: rename_func("QUANTILE_CONT"), 682 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 683 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 684 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 685 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 686 exp.RegexpReplace: lambda self, e: self.func( 687 "REGEXP_REPLACE", 688 e.this, 689 e.expression, 690 e.args.get("replacement"), 691 e.args.get("modifiers"), 692 ), 693 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 694 exp.RegexpILike: lambda self, e: self.func( 695 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 696 ), 697 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 698 exp.Return: lambda self, e: self.sql(e, "this"), 699 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 700 exp.Rand: rename_func("RANDOM"), 701 exp.SHA: rename_func("SHA1"), 702 exp.SHA2: sha256_sql, 703 exp.Split: rename_func("STR_SPLIT"), 704 exp.SortArray: _sort_array_sql, 705 exp.StrPosition: strposition_sql, 706 exp.StrToUnix: lambda self, e: self.func( 707 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 708 ), 709 exp.Struct: _struct_sql, 710 exp.Transform: rename_func("LIST_TRANSFORM"), 711 exp.TimeAdd: _date_delta_sql, 712 exp.Time: no_time_sql, 713 exp.TimeDiff: _timediff_sql, 714 exp.Timestamp: no_timestamp_sql, 715 exp.TimestampDiff: lambda self, e: self.func( 716 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 717 ), 718 exp.TimestampTrunc: timestamptrunc_sql(), 719 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 720 exp.TimeStrToTime: timestrtotime_sql, 721 exp.TimeStrToUnix: lambda self, e: self.func( 722 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 723 ), 724 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 725 exp.TimeToUnix: rename_func("EPOCH"), 726 exp.TsOrDiToDi: lambda self, 727 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 728 exp.TsOrDsAdd: _date_delta_sql, 729 exp.TsOrDsDiff: lambda self, e: self.func( 730 "DATE_DIFF", 731 f"'{e.args.get('unit') or 'DAY'}'", 732 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 733 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 734 ), 735 exp.UnixToStr: lambda self, e: self.func( 736 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 737 ), 738 exp.DatetimeTrunc: lambda self, e: self.func( 739 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 740 ), 741 exp.UnixToTime: _unix_to_time_sql, 742 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 743 exp.VariancePop: rename_func("VAR_POP"), 744 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 745 exp.Xor: bool_xor_sql, 746 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 747 rename_func("LEVENSHTEIN") 748 ), 749 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 750 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 751 exp.DateBin: rename_func("TIME_BUCKET"), 752 } 753 754 SUPPORTED_JSON_PATH_PARTS = { 755 exp.JSONPathKey, 756 exp.JSONPathRoot, 757 exp.JSONPathSubscript, 758 exp.JSONPathWildcard, 759 } 760 761 TYPE_MAPPING = { 762 **generator.Generator.TYPE_MAPPING, 763 exp.DataType.Type.BINARY: "BLOB", 764 exp.DataType.Type.BPCHAR: "TEXT", 765 exp.DataType.Type.CHAR: "TEXT", 766 exp.DataType.Type.DATETIME: "TIMESTAMP", 767 exp.DataType.Type.FLOAT: "REAL", 768 exp.DataType.Type.JSONB: "JSON", 769 exp.DataType.Type.NCHAR: "TEXT", 770 exp.DataType.Type.NVARCHAR: "TEXT", 771 exp.DataType.Type.UINT: "UINTEGER", 772 exp.DataType.Type.VARBINARY: "BLOB", 773 exp.DataType.Type.ROWVERSION: "BLOB", 774 exp.DataType.Type.VARCHAR: "TEXT", 775 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 776 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 777 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 778 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 779 } 780 781 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 782 RESERVED_KEYWORDS = { 783 "array", 784 "analyse", 785 "union", 786 "all", 787 "when", 788 "in_p", 789 "default", 790 "create_p", 791 "window", 792 "asymmetric", 793 "to", 794 "else", 795 "localtime", 796 "from", 797 "end_p", 798 "select", 799 "current_date", 800 "foreign", 801 "with", 802 "grant", 803 "session_user", 804 "or", 805 "except", 806 "references", 807 "fetch", 808 "limit", 809 "group_p", 810 "leading", 811 "into", 812 "collate", 813 "offset", 814 "do", 815 "then", 816 "localtimestamp", 817 "check_p", 818 "lateral_p", 819 "current_role", 820 "where", 821 "asc_p", 822 "placing", 823 "desc_p", 824 "user", 825 "unique", 826 "initially", 827 "column", 828 "both", 829 "some", 830 "as", 831 "any", 832 "only", 833 "deferrable", 834 "null_p", 835 "current_time", 836 "true_p", 837 "table", 838 "case", 839 "trailing", 840 "variadic", 841 "for", 842 "on", 843 "distinct", 844 "false_p", 845 "not", 846 "constraint", 847 "current_timestamp", 848 "returning", 849 "primary", 850 "intersect", 851 "having", 852 "analyze", 853 "current_user", 854 "and", 855 "cast", 856 "symmetric", 857 "using", 858 "order", 859 "current_catalog", 860 } 861 862 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 863 864 # DuckDB doesn't generally support CREATE TABLE .. properties 865 # https://duckdb.org/docs/sql/statements/create_table.html 866 PROPERTIES_LOCATION = { 867 prop: exp.Properties.Location.UNSUPPORTED 868 for prop in generator.Generator.PROPERTIES_LOCATION 869 } 870 871 # There are a few exceptions (e.g. temporary tables) which are supported or 872 # can be transpiled to DuckDB, so we explicitly override them accordingly 873 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 874 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 875 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 876 877 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 878 exp.FirstValue, 879 exp.Lag, 880 exp.LastValue, 881 exp.Lead, 882 exp.NthValue, 883 ) 884 885 def show_sql(self, expression: exp.Show) -> str: 886 return f"SHOW {expression.name}" 887 888 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 889 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 890 891 def strtotime_sql(self, expression: exp.StrToTime) -> str: 892 if expression.args.get("safe"): 893 formatted_time = self.format_time(expression) 894 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 895 return str_to_time_sql(self, expression) 896 897 def strtodate_sql(self, expression: exp.StrToDate) -> str: 898 if expression.args.get("safe"): 899 formatted_time = self.format_time(expression) 900 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 901 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 902 903 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 904 arg = expression.this 905 if expression.args.get("safe"): 906 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 907 return self.func("JSON", arg) 908 909 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 910 nano = expression.args.get("nano") 911 if nano is not None: 912 expression.set( 913 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 914 ) 915 916 return rename_func("MAKE_TIME")(self, expression) 917 918 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 919 sec = expression.args["sec"] 920 921 milli = expression.args.get("milli") 922 if milli is not None: 923 sec += milli.pop() / exp.Literal.number(1000.0) 924 925 nano = expression.args.get("nano") 926 if nano is not None: 927 sec += nano.pop() / exp.Literal.number(1000000000.0) 928 929 if milli or nano: 930 expression.set("sec", sec) 931 932 return rename_func("MAKE_TIMESTAMP")(self, expression) 933 934 def tablesample_sql( 935 self, 936 expression: exp.TableSample, 937 tablesample_keyword: t.Optional[str] = None, 938 ) -> str: 939 if not isinstance(expression.parent, exp.Select): 940 # This sample clause only applies to a single source, not the entire resulting relation 941 tablesample_keyword = "TABLESAMPLE" 942 943 if expression.args.get("size"): 944 method = expression.args.get("method") 945 if method and method.name.upper() != "RESERVOIR": 946 self.unsupported( 947 f"Sampling method {method} is not supported with a discrete sample count, " 948 "defaulting to reservoir sampling" 949 ) 950 expression.set("method", exp.var("RESERVOIR")) 951 952 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 953 954 def interval_sql(self, expression: exp.Interval) -> str: 955 multiplier: t.Optional[int] = None 956 unit = expression.text("unit").lower() 957 958 if unit.startswith("week"): 959 multiplier = 7 960 if unit.startswith("quarter"): 961 multiplier = 90 962 963 if multiplier: 964 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 965 966 return super().interval_sql(expression) 967 968 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 969 if isinstance(expression.parent, exp.UserDefinedFunction): 970 return self.sql(expression, "this") 971 return super().columndef_sql(expression, sep) 972 973 def join_sql(self, expression: exp.Join) -> str: 974 if ( 975 expression.side == "LEFT" 976 and not expression.args.get("on") 977 and isinstance(expression.this, exp.Unnest) 978 ): 979 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 980 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 981 return super().join_sql(expression.on(exp.true())) 982 983 return super().join_sql(expression) 984 985 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 986 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 987 if expression.args.get("is_end_exclusive"): 988 return rename_func("RANGE")(self, expression) 989 990 return self.function_fallback_sql(expression) 991 992 def countif_sql(self, expression: exp.CountIf) -> str: 993 if self.dialect.version >= Version("1.2"): 994 return self.function_fallback_sql(expression) 995 996 # https://github.com/tobymao/sqlglot/pull/4749 997 return count_if_to_sum(self, expression) 998 999 def bracket_sql(self, expression: exp.Bracket) -> str: 1000 if self.dialect.version >= Version("1.2"): 1001 return super().bracket_sql(expression) 1002 1003 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1004 this = expression.this 1005 if isinstance(this, exp.Array): 1006 this.replace(exp.paren(this)) 1007 1008 bracket = super().bracket_sql(expression) 1009 1010 if not expression.args.get("returns_list_for_maps"): 1011 if not this.type: 1012 from sqlglot.optimizer.annotate_types import annotate_types 1013 1014 this = annotate_types(this, dialect=self.dialect) 1015 1016 if this.is_type(exp.DataType.Type.MAP): 1017 bracket = f"({bracket})[1]" 1018 1019 return bracket 1020 1021 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1022 expression_sql = self.sql(expression, "expression") 1023 1024 func = expression.this 1025 if isinstance(func, exp.PERCENTILES): 1026 # Make the order key the first arg and slide the fraction to the right 1027 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1028 order_col = expression.find(exp.Ordered) 1029 if order_col: 1030 func.set("expression", func.this) 1031 func.set("this", order_col.this) 1032 1033 this = self.sql(expression, "this").rstrip(")") 1034 1035 return f"{this}{expression_sql})" 1036 1037 def length_sql(self, expression: exp.Length) -> str: 1038 arg = expression.this 1039 1040 # Dialects like BQ and Snowflake also accept binary values as args, so 1041 # DDB will attempt to infer the type or resort to case/when resolution 1042 if not expression.args.get("binary") or arg.is_string: 1043 return self.func("LENGTH", arg) 1044 1045 if not arg.type: 1046 from sqlglot.optimizer.annotate_types import annotate_types 1047 1048 arg = annotate_types(arg, dialect=self.dialect) 1049 1050 if arg.is_type(*exp.DataType.TEXT_TYPES): 1051 return self.func("LENGTH", arg) 1052 1053 # We need these casts to make duckdb's static type checker happy 1054 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1055 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1056 1057 case = ( 1058 exp.case(self.func("TYPEOF", arg)) 1059 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1060 .else_( 1061 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1062 ) # anonymous to break length_sql recursion 1063 ) 1064 1065 return self.sql(case) 1066 1067 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1068 this = expression.this 1069 key = expression.args.get("key") 1070 key_sql = key.name if isinstance(key, exp.Expression) else "" 1071 value_sql = self.sql(expression, "value") 1072 1073 kv_sql = f"{key_sql} := {value_sql}" 1074 1075 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1076 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1077 if isinstance(this, exp.Struct) and not this.expressions: 1078 return self.func("STRUCT_PACK", kv_sql) 1079 1080 return self.func("STRUCT_INSERT", this, kv_sql) 1081 1082 def unnest_sql(self, expression: exp.Unnest) -> str: 1083 explode_array = expression.args.get("explode_array") 1084 if explode_array: 1085 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1086 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1087 expression.expressions.append( 1088 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1089 ) 1090 1091 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1092 alias = expression.args.get("alias") 1093 if alias: 1094 expression.set("alias", None) 1095 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1096 1097 unnest_sql = super().unnest_sql(expression) 1098 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1099 return self.sql(select) 1100 1101 return super().unnest_sql(expression) 1102 1103 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1104 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1105 # DuckDB should render IGNORE NULLS only for the general-purpose 1106 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1107 return super().ignorenulls_sql(expression) 1108 1109 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1110 return self.sql(expression, "this") 1111 1112 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render RESPECT NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1116 return super().respectnulls_sql(expression) 1117 1118 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this") 1120 1121 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1122 this = self.sql(expression, "this") 1123 null_text = self.sql(expression, "null") 1124 1125 if null_text: 1126 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1127 1128 return self.func("ARRAY_TO_STRING", this, expression.expression) 1129 1130 @unsupported_args("position", "occurrence") 1131 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1132 group = expression.args.get("group") 1133 params = expression.args.get("parameters") 1134 1135 # Do not render group if there is no following argument, 1136 # and it's the default value for this dialect 1137 if ( 1138 not params 1139 and group 1140 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1141 ): 1142 group = None 1143 return self.func( 1144 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1145 ) 1146 1147 @unsupported_args("culture") 1148 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1149 fmt = expression.args.get("format") 1150 if fmt and fmt.is_int: 1151 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1152 1153 self.unsupported("Only integer formats are supported by NumberToStr") 1154 return self.function_fallback_sql(expression) 1155 1156 def autoincrementcolumnconstraint_sql(self, _) -> str: 1157 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1158 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
897 def strtodate_sql(self, expression: exp.StrToDate) -> str: 898 if expression.args.get("safe"): 899 formatted_time = self.format_time(expression) 900 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 901 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
909 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 910 nano = expression.args.get("nano") 911 if nano is not None: 912 expression.set( 913 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 914 ) 915 916 return rename_func("MAKE_TIME")(self, expression)
918 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 919 sec = expression.args["sec"] 920 921 milli = expression.args.get("milli") 922 if milli is not None: 923 sec += milli.pop() / exp.Literal.number(1000.0) 924 925 nano = expression.args.get("nano") 926 if nano is not None: 927 sec += nano.pop() / exp.Literal.number(1000000000.0) 928 929 if milli or nano: 930 expression.set("sec", sec) 931 932 return rename_func("MAKE_TIMESTAMP")(self, expression)
934 def tablesample_sql( 935 self, 936 expression: exp.TableSample, 937 tablesample_keyword: t.Optional[str] = None, 938 ) -> str: 939 if not isinstance(expression.parent, exp.Select): 940 # This sample clause only applies to a single source, not the entire resulting relation 941 tablesample_keyword = "TABLESAMPLE" 942 943 if expression.args.get("size"): 944 method = expression.args.get("method") 945 if method and method.name.upper() != "RESERVOIR": 946 self.unsupported( 947 f"Sampling method {method} is not supported with a discrete sample count, " 948 "defaulting to reservoir sampling" 949 ) 950 expression.set("method", exp.var("RESERVOIR")) 951 952 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
954 def interval_sql(self, expression: exp.Interval) -> str: 955 multiplier: t.Optional[int] = None 956 unit = expression.text("unit").lower() 957 958 if unit.startswith("week"): 959 multiplier = 7 960 if unit.startswith("quarter"): 961 multiplier = 90 962 963 if multiplier: 964 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 965 966 return super().interval_sql(expression)
973 def join_sql(self, expression: exp.Join) -> str: 974 if ( 975 expression.side == "LEFT" 976 and not expression.args.get("on") 977 and isinstance(expression.this, exp.Unnest) 978 ): 979 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 980 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 981 return super().join_sql(expression.on(exp.true())) 982 983 return super().join_sql(expression)
999 def bracket_sql(self, expression: exp.Bracket) -> str: 1000 if self.dialect.version >= Version("1.2"): 1001 return super().bracket_sql(expression) 1002 1003 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1004 this = expression.this 1005 if isinstance(this, exp.Array): 1006 this.replace(exp.paren(this)) 1007 1008 bracket = super().bracket_sql(expression) 1009 1010 if not expression.args.get("returns_list_for_maps"): 1011 if not this.type: 1012 from sqlglot.optimizer.annotate_types import annotate_types 1013 1014 this = annotate_types(this, dialect=self.dialect) 1015 1016 if this.is_type(exp.DataType.Type.MAP): 1017 bracket = f"({bracket})[1]" 1018 1019 return bracket
1021 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1022 expression_sql = self.sql(expression, "expression") 1023 1024 func = expression.this 1025 if isinstance(func, exp.PERCENTILES): 1026 # Make the order key the first arg and slide the fraction to the right 1027 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1028 order_col = expression.find(exp.Ordered) 1029 if order_col: 1030 func.set("expression", func.this) 1031 func.set("this", order_col.this) 1032 1033 this = self.sql(expression, "this").rstrip(")") 1034 1035 return f"{this}{expression_sql})"
1037 def length_sql(self, expression: exp.Length) -> str: 1038 arg = expression.this 1039 1040 # Dialects like BQ and Snowflake also accept binary values as args, so 1041 # DDB will attempt to infer the type or resort to case/when resolution 1042 if not expression.args.get("binary") or arg.is_string: 1043 return self.func("LENGTH", arg) 1044 1045 if not arg.type: 1046 from sqlglot.optimizer.annotate_types import annotate_types 1047 1048 arg = annotate_types(arg, dialect=self.dialect) 1049 1050 if arg.is_type(*exp.DataType.TEXT_TYPES): 1051 return self.func("LENGTH", arg) 1052 1053 # We need these casts to make duckdb's static type checker happy 1054 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1055 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1056 1057 case = ( 1058 exp.case(self.func("TYPEOF", arg)) 1059 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1060 .else_( 1061 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1062 ) # anonymous to break length_sql recursion 1063 ) 1064 1065 return self.sql(case)
1067 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1068 this = expression.this 1069 key = expression.args.get("key") 1070 key_sql = key.name if isinstance(key, exp.Expression) else "" 1071 value_sql = self.sql(expression, "value") 1072 1073 kv_sql = f"{key_sql} := {value_sql}" 1074 1075 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1076 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1077 if isinstance(this, exp.Struct) and not this.expressions: 1078 return self.func("STRUCT_PACK", kv_sql) 1079 1080 return self.func("STRUCT_INSERT", this, kv_sql)
1082 def unnest_sql(self, expression: exp.Unnest) -> str: 1083 explode_array = expression.args.get("explode_array") 1084 if explode_array: 1085 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1086 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1087 expression.expressions.append( 1088 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1089 ) 1090 1091 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1092 alias = expression.args.get("alias") 1093 if alias: 1094 expression.set("alias", None) 1095 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1096 1097 unnest_sql = super().unnest_sql(expression) 1098 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1099 return self.sql(select) 1100 1101 return super().unnest_sql(expression)
1103 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1104 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1105 # DuckDB should render IGNORE NULLS only for the general-purpose 1106 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1107 return super().ignorenulls_sql(expression) 1108 1109 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1110 return self.sql(expression, "this")
1112 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render RESPECT NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1116 return super().respectnulls_sql(expression) 1117 1118 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this")
1121 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1122 this = self.sql(expression, "this") 1123 null_text = self.sql(expression, "null") 1124 1125 if null_text: 1126 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1127 1128 return self.func("ARRAY_TO_STRING", this, expression.expression)
1130 @unsupported_args("position", "occurrence") 1131 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1132 group = expression.args.get("group") 1133 params = expression.args.get("parameters") 1134 1135 # Do not render group if there is no following argument, 1136 # and it's the default value for this dialect 1137 if ( 1138 not params 1139 and group 1140 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1141 ): 1142 group = None 1143 return self.func( 1144 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1145 )
1147 @unsupported_args("culture") 1148 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1149 fmt = expression.args.get("format") 1150 if fmt and fmt.is_int: 1151 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1152 1153 self.unsupported("Only integer formats are supported by NumberToStr") 1154 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql