sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 strposition_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39 groupconcat_sql, 40) 41from sqlglot.generator import unsupported_args 42from sqlglot.helper import seq_get 43from sqlglot.tokens import TokenType 44from sqlglot.parser import binary_range_parser 45 46DATETIME_DELTA = t.Union[ 47 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 48] 49 50 51def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 52 this = expression.this 53 unit = unit_to_var(expression) 54 op = ( 55 "+" 56 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 57 else "-" 58 ) 59 60 to_type: t.Optional[DATA_TYPE] = None 61 if isinstance(expression, exp.TsOrDsAdd): 62 to_type = expression.return_type 63 elif this.is_string: 64 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 65 to_type = ( 66 exp.DataType.Type.DATETIME 67 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 68 else exp.DataType.Type.DATE 69 ) 70 71 this = exp.cast(this, to_type) if to_type else this 72 73 expr = expression.expression 74 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 75 76 return f"{self.sql(this)} {op} {self.sql(interval)}" 77 78 79# BigQuery -> DuckDB conversion for the DATE function 80def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 81 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 82 zone = self.sql(expression, "zone") 83 84 if zone: 85 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 86 date_str = f"{date_str} || ' ' || {zone}" 87 88 # This will create a TIMESTAMP with time zone information 89 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 90 91 return result 92 93 94# BigQuery -> DuckDB conversion for the TIME_DIFF function 95def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 96 this = exp.cast(expression.this, exp.DataType.Type.TIME) 97 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 98 99 # Although the 2 dialects share similar signatures, BQ seems to inverse 100 # the sign of the result so the start/end time operands are flipped 101 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 102 103 104@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 105def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 106 return self.func("ARRAY_SORT", expression.this) 107 108 109def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 110 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 111 return self.func(name, expression.this) 112 113 114def _build_sort_array_desc(args: t.List) -> exp.Expression: 115 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 116 117 118def _build_date_diff(args: t.List) -> exp.Expression: 119 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 120 121 122def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 123 def _builder(args: t.List) -> exp.GenerateSeries: 124 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 125 if len(args) == 1: 126 # DuckDB uses 0 as a default for the series' start when it's omitted 127 args.insert(0, exp.Literal.number("0")) 128 129 gen_series = exp.GenerateSeries.from_arg_list(args) 130 gen_series.set("is_end_exclusive", end_exclusive) 131 132 return gen_series 133 134 return _builder 135 136 137def _build_make_timestamp(args: t.List) -> exp.Expression: 138 if len(args) == 1: 139 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 140 141 return exp.TimestampFromParts( 142 year=seq_get(args, 0), 143 month=seq_get(args, 1), 144 day=seq_get(args, 2), 145 hour=seq_get(args, 3), 146 min=seq_get(args, 4), 147 sec=seq_get(args, 5), 148 ) 149 150 151def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 152 def _parse(self: DuckDB.Parser) -> exp.Show: 153 return self._parse_show_duckdb(*args, **kwargs) 154 155 return _parse 156 157 158def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 159 args: t.List[str] = [] 160 161 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 162 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 163 # The transformation to ROW will take place if: 164 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 165 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 166 ancestor_cast = expression.find_ancestor(exp.Cast) 167 is_bq_inline_struct = ( 168 (expression.find(exp.PropertyEQ) is None) 169 and ancestor_cast 170 and any( 171 casted_type.is_type(exp.DataType.Type.STRUCT) 172 for casted_type in ancestor_cast.find_all(exp.DataType) 173 ) 174 ) 175 176 for i, expr in enumerate(expression.expressions): 177 is_property_eq = isinstance(expr, exp.PropertyEQ) 178 value = expr.expression if is_property_eq else expr 179 180 if is_bq_inline_struct: 181 args.append(self.sql(value)) 182 else: 183 key = expr.name if is_property_eq else f"_{i}" 184 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 185 186 csv_args = ", ".join(args) 187 188 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 189 190 191def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 192 if expression.is_type("array"): 193 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 194 195 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 196 if expression.is_type( 197 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 198 ): 199 return expression.this.value 200 201 return self.datatype_sql(expression) 202 203 204def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 205 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 206 return f"CAST({sql} AS TEXT)" 207 208 209def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 210 scale = expression.args.get("scale") 211 timestamp = expression.this 212 213 if scale in (None, exp.UnixToTime.SECONDS): 214 return self.func("TO_TIMESTAMP", timestamp) 215 if scale == exp.UnixToTime.MILLIS: 216 return self.func("EPOCH_MS", timestamp) 217 if scale == exp.UnixToTime.MICROS: 218 return self.func("MAKE_TIMESTAMP", timestamp) 219 220 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 221 222 223WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 224 225 226def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 227 arrow_sql = arrow_json_extract_sql(self, expression) 228 if not expression.same_parent and isinstance( 229 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 230 ): 231 arrow_sql = self.wrap(arrow_sql) 232 return arrow_sql 233 234 235def _implicit_datetime_cast( 236 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 237) -> t.Optional[exp.Expression]: 238 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 239 240 241def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 242 this = _implicit_datetime_cast(expression.this) 243 expr = _implicit_datetime_cast(expression.expression) 244 245 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 246 247 248def _generate_datetime_array_sql( 249 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 250) -> str: 251 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 252 253 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 254 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 255 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 256 257 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 258 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 259 start=start, end=end, step=expression.args.get("step") 260 ) 261 262 if is_generate_date_array: 263 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 264 # GENERATE_DATE_ARRAY we must cast it back to DATE array 265 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 266 267 return self.sql(gen_series) 268 269 270def _json_extract_value_array_sql( 271 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 272) -> str: 273 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 274 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 275 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 276 277 278class DuckDB(Dialect): 279 NULL_ORDERING = "nulls_are_last" 280 SUPPORTS_USER_DEFINED_TYPES = True 281 SAFE_DIVISION = True 282 INDEX_OFFSET = 1 283 CONCAT_COALESCE = True 284 SUPPORTS_ORDER_BY_ALL = True 285 SUPPORTS_FIXED_SIZE_ARRAYS = True 286 STRICT_JSON_PATH_SYNTAX = False 287 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 288 289 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 290 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 291 292 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 293 if isinstance(path, exp.Literal): 294 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 295 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 296 # This check ensures we'll avoid trying to parse these as JSON paths, which can 297 # either result in a noisy warning or in an invalid representation of the path. 298 path_text = path.name 299 if path_text.startswith("/") or "[#" in path_text: 300 return path 301 302 return super().to_json_path(path) 303 304 class Tokenizer(tokens.Tokenizer): 305 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 306 HEREDOC_STRINGS = ["$"] 307 308 HEREDOC_TAG_IS_IDENTIFIER = True 309 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 310 311 KEYWORDS = { 312 **tokens.Tokenizer.KEYWORDS, 313 "//": TokenType.DIV, 314 "**": TokenType.DSTAR, 315 "^@": TokenType.CARET_AT, 316 "@>": TokenType.AT_GT, 317 "<@": TokenType.LT_AT, 318 "ATTACH": TokenType.ATTACH, 319 "BINARY": TokenType.VARBINARY, 320 "BITSTRING": TokenType.BIT, 321 "BPCHAR": TokenType.TEXT, 322 "CHAR": TokenType.TEXT, 323 "CHARACTER VARYING": TokenType.TEXT, 324 "DETACH": TokenType.DETACH, 325 "EXCLUDE": TokenType.EXCEPT, 326 "LOGICAL": TokenType.BOOLEAN, 327 "ONLY": TokenType.ONLY, 328 "PIVOT_WIDER": TokenType.PIVOT, 329 "POSITIONAL": TokenType.POSITIONAL, 330 "SIGNED": TokenType.INT, 331 "STRING": TokenType.TEXT, 332 "SUMMARIZE": TokenType.SUMMARIZE, 333 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 334 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 335 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 336 "TIMESTAMP_US": TokenType.TIMESTAMP, 337 "UBIGINT": TokenType.UBIGINT, 338 "UINTEGER": TokenType.UINT, 339 "USMALLINT": TokenType.USMALLINT, 340 "UTINYINT": TokenType.UTINYINT, 341 "VARCHAR": TokenType.TEXT, 342 } 343 KEYWORDS.pop("/*+") 344 345 SINGLE_TOKENS = { 346 **tokens.Tokenizer.SINGLE_TOKENS, 347 "$": TokenType.PARAMETER, 348 } 349 350 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 351 352 class Parser(parser.Parser): 353 BITWISE = { 354 **parser.Parser.BITWISE, 355 TokenType.TILDA: exp.RegexpLike, 356 } 357 BITWISE.pop(TokenType.CARET) 358 359 RANGE_PARSERS = { 360 **parser.Parser.RANGE_PARSERS, 361 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 362 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 363 } 364 365 EXPONENT = { 366 **parser.Parser.EXPONENT, 367 TokenType.CARET: exp.Pow, 368 TokenType.DSTAR: exp.Pow, 369 } 370 371 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 372 373 SHOW_PARSERS = { 374 "TABLES": _show_parser("TABLES"), 375 "ALL TABLES": _show_parser("ALL TABLES"), 376 } 377 378 FUNCTIONS = { 379 **parser.Parser.FUNCTIONS, 380 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 381 "ARRAY_SORT": exp.SortArray.from_arg_list, 382 "DATEDIFF": _build_date_diff, 383 "DATE_DIFF": _build_date_diff, 384 "DATE_TRUNC": date_trunc_to_time, 385 "DATETRUNC": date_trunc_to_time, 386 "DECODE": lambda args: exp.Decode( 387 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 388 ), 389 "EDITDIST3": exp.Levenshtein.from_arg_list, 390 "ENCODE": lambda args: exp.Encode( 391 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 392 ), 393 "EPOCH": exp.TimeToUnix.from_arg_list, 394 "EPOCH_MS": lambda args: exp.UnixToTime( 395 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 396 ), 397 "GENERATE_SERIES": _build_generate_series(), 398 "JSON": exp.ParseJSON.from_arg_list, 399 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 400 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 401 "LIST_HAS": exp.ArrayContains.from_arg_list, 402 "LIST_REVERSE_SORT": _build_sort_array_desc, 403 "LIST_SORT": exp.SortArray.from_arg_list, 404 "LIST_VALUE": lambda args: exp.Array(expressions=args), 405 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 406 "MAKE_TIMESTAMP": _build_make_timestamp, 407 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 408 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 409 "RANGE": _build_generate_series(end_exclusive=True), 410 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 411 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 412 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 413 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 414 this=seq_get(args, 0), 415 expression=seq_get(args, 1), 416 replacement=seq_get(args, 2), 417 modifiers=seq_get(args, 3), 418 ), 419 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 420 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 421 "STRING_SPLIT": exp.Split.from_arg_list, 422 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 423 "STRING_TO_ARRAY": exp.Split.from_arg_list, 424 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 425 "STRUCT_PACK": exp.Struct.from_arg_list, 426 "STR_SPLIT": exp.Split.from_arg_list, 427 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 428 "TIME_BUCKET": exp.DateBin.from_arg_list, 429 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 430 "UNNEST": exp.Explode.from_arg_list, 431 "XOR": binary_from_function(exp.BitwiseXor), 432 } 433 434 FUNCTIONS.pop("DATE_SUB") 435 FUNCTIONS.pop("GLOB") 436 437 FUNCTION_PARSERS = { 438 **parser.Parser.FUNCTION_PARSERS, 439 **dict.fromkeys( 440 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 441 ), 442 } 443 FUNCTION_PARSERS.pop("DECODE") 444 445 NO_PAREN_FUNCTION_PARSERS = { 446 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 447 "MAP": lambda self: self._parse_map(), 448 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 449 } 450 451 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 452 TokenType.SEMI, 453 TokenType.ANTI, 454 } 455 456 PLACEHOLDER_PARSERS = { 457 **parser.Parser.PLACEHOLDER_PARSERS, 458 TokenType.PARAMETER: lambda self: ( 459 self.expression(exp.Placeholder, this=self._prev.text) 460 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 461 else None 462 ), 463 } 464 465 TYPE_CONVERTERS = { 466 # https://duckdb.org/docs/sql/data_types/numeric 467 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 468 # https://duckdb.org/docs/sql/data_types/text 469 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 470 } 471 472 STATEMENT_PARSERS = { 473 **parser.Parser.STATEMENT_PARSERS, 474 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 475 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 476 TokenType.SHOW: lambda self: self._parse_show(), 477 } 478 479 def _parse_expression(self) -> t.Optional[exp.Expression]: 480 # DuckDB supports prefix aliases, e.g. foo: 1 481 if self._next and self._next.token_type == TokenType.COLON: 482 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 483 self._match(TokenType.COLON) 484 comments = self._prev_comments or [] 485 486 this = self._parse_assignment() 487 if isinstance(this, exp.Expression): 488 # Moves the comment next to the alias in `alias: expr /* comment */` 489 comments += this.pop_comments() or [] 490 491 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 492 493 return super()._parse_expression() 494 495 def _parse_table( 496 self, 497 schema: bool = False, 498 joins: bool = False, 499 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 500 parse_bracket: bool = False, 501 is_db_reference: bool = False, 502 parse_partition: bool = False, 503 ) -> t.Optional[exp.Expression]: 504 # DuckDB supports prefix aliases, e.g. FROM foo: bar 505 if self._next and self._next.token_type == TokenType.COLON: 506 alias = self._parse_table_alias( 507 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 508 ) 509 self._match(TokenType.COLON) 510 comments = self._prev_comments or [] 511 else: 512 alias = None 513 comments = [] 514 515 table = super()._parse_table( 516 schema=schema, 517 joins=joins, 518 alias_tokens=alias_tokens, 519 parse_bracket=parse_bracket, 520 is_db_reference=is_db_reference, 521 parse_partition=parse_partition, 522 ) 523 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 524 # Moves the comment next to the alias in `alias: table /* comment */` 525 comments += table.pop_comments() or [] 526 alias.comments = alias.pop_comments() + comments 527 table.set("alias", alias) 528 529 return table 530 531 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 532 # https://duckdb.org/docs/sql/samples.html 533 sample = super()._parse_table_sample(as_modifier=as_modifier) 534 if sample and not sample.args.get("method"): 535 if sample.args.get("size"): 536 sample.set("method", exp.var("RESERVOIR")) 537 else: 538 sample.set("method", exp.var("SYSTEM")) 539 540 return sample 541 542 def _parse_bracket( 543 self, this: t.Optional[exp.Expression] = None 544 ) -> t.Optional[exp.Expression]: 545 bracket = super()._parse_bracket(this) 546 547 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 548 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 549 bracket.set("returns_list_for_maps", True) 550 551 return bracket 552 553 def _parse_map(self) -> exp.ToMap | exp.Map: 554 if self._match(TokenType.L_BRACE, advance=False): 555 return self.expression(exp.ToMap, this=self._parse_bracket()) 556 557 args = self._parse_wrapped_csv(self._parse_assignment) 558 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 559 560 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 561 return self._parse_field_def() 562 563 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 564 if len(aggregations) == 1: 565 return super()._pivot_column_names(aggregations) 566 return pivot_column_names(aggregations, dialect="duckdb") 567 568 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 569 def _parse_attach_option() -> exp.AttachOption: 570 return self.expression( 571 exp.AttachOption, 572 this=self._parse_var(any_token=True), 573 expression=self._parse_field(any_token=True), 574 ) 575 576 self._match(TokenType.DATABASE) 577 exists = self._parse_exists(not_=is_attach) 578 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 579 580 if self._match(TokenType.L_PAREN, advance=False): 581 expressions = self._parse_wrapped_csv(_parse_attach_option) 582 else: 583 expressions = None 584 585 return ( 586 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 587 if is_attach 588 else self.expression(exp.Detach, this=this, exists=exists) 589 ) 590 591 def _parse_show_duckdb(self, this: str) -> exp.Show: 592 return self.expression(exp.Show, this=this) 593 594 class Generator(generator.Generator): 595 PARAMETER_TOKEN = "$" 596 NAMED_PLACEHOLDER_TOKEN = "$" 597 JOIN_HINTS = False 598 TABLE_HINTS = False 599 QUERY_HINTS = False 600 LIMIT_FETCH = "LIMIT" 601 STRUCT_DELIMITER = ("(", ")") 602 RENAME_TABLE_WITH_DB = False 603 NVL2_SUPPORTED = False 604 SEMI_ANTI_JOIN_WITH_SIDE = False 605 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 606 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 607 LAST_DAY_SUPPORTS_DATE_PART = False 608 JSON_KEY_VALUE_PAIR_SEP = "," 609 IGNORE_NULLS_IN_FUNC = True 610 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 611 SUPPORTS_CREATE_TABLE_LIKE = False 612 MULTI_ARG_DISTINCT = False 613 CAN_IMPLEMENT_ARRAY_ANY = True 614 SUPPORTS_TO_NUMBER = False 615 COPY_HAS_INTO_KEYWORD = False 616 STAR_EXCEPT = "EXCLUDE" 617 PAD_FILL_PATTERN_IS_REQUIRED = True 618 ARRAY_CONCAT_IS_VAR_LEN = False 619 ARRAY_SIZE_DIM_REQUIRED = False 620 621 TRANSFORMS = { 622 **generator.Generator.TRANSFORMS, 623 exp.ApproxDistinct: approx_count_distinct_sql, 624 exp.Array: inline_array_unless_query, 625 exp.ArrayFilter: rename_func("LIST_FILTER"), 626 exp.ArraySort: _array_sort_sql, 627 exp.ArraySum: rename_func("LIST_SUM"), 628 exp.BitwiseXor: rename_func("XOR"), 629 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 630 exp.CurrentDate: lambda *_: "CURRENT_DATE", 631 exp.CurrentTime: lambda *_: "CURRENT_TIME", 632 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 633 exp.DayOfMonth: rename_func("DAYOFMONTH"), 634 exp.DayOfWeek: rename_func("DAYOFWEEK"), 635 exp.DayOfWeekIso: rename_func("ISODOW"), 636 exp.DayOfYear: rename_func("DAYOFYEAR"), 637 exp.DataType: _datatype_sql, 638 exp.Date: _date_sql, 639 exp.DateAdd: _date_delta_sql, 640 exp.DateFromParts: rename_func("MAKE_DATE"), 641 exp.DateSub: _date_delta_sql, 642 exp.DateDiff: _date_diff_sql, 643 exp.DateStrToDate: datestrtodate_sql, 644 exp.Datetime: no_datetime_sql, 645 exp.DatetimeSub: _date_delta_sql, 646 exp.DatetimeAdd: _date_delta_sql, 647 exp.DateToDi: lambda self, 648 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 649 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 650 exp.DiToDate: lambda self, 651 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 652 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 653 exp.GenerateDateArray: _generate_datetime_array_sql, 654 exp.GenerateTimestampArray: _generate_datetime_array_sql, 655 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 656 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 657 exp.Explode: rename_func("UNNEST"), 658 exp.IntDiv: lambda self, e: self.binary(e, "//"), 659 exp.IsInf: rename_func("ISINF"), 660 exp.IsNan: rename_func("ISNAN"), 661 exp.JSONBExists: rename_func("JSON_EXISTS"), 662 exp.JSONExtract: _arrow_json_extract_sql, 663 exp.JSONExtractArray: _json_extract_value_array_sql, 664 exp.JSONExtractScalar: _arrow_json_extract_sql, 665 exp.JSONFormat: _json_format_sql, 666 exp.JSONValueArray: _json_extract_value_array_sql, 667 exp.Lateral: explode_to_unnest_sql, 668 exp.LogicalOr: rename_func("BOOL_OR"), 669 exp.LogicalAnd: rename_func("BOOL_AND"), 670 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 671 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 672 exp.MonthsBetween: lambda self, e: self.func( 673 "DATEDIFF", 674 "'month'", 675 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 676 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 677 ), 678 exp.PercentileCont: rename_func("QUANTILE_CONT"), 679 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 680 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 681 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 682 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 683 exp.RegexpReplace: lambda self, e: self.func( 684 "REGEXP_REPLACE", 685 e.this, 686 e.expression, 687 e.args.get("replacement"), 688 e.args.get("modifiers"), 689 ), 690 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 691 exp.RegexpILike: lambda self, e: self.func( 692 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 693 ), 694 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 695 exp.Return: lambda self, e: self.sql(e, "this"), 696 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 697 exp.Rand: rename_func("RANDOM"), 698 exp.SHA: rename_func("SHA1"), 699 exp.SHA2: sha256_sql, 700 exp.Split: rename_func("STR_SPLIT"), 701 exp.SortArray: _sort_array_sql, 702 exp.StrPosition: strposition_sql, 703 exp.StrToUnix: lambda self, e: self.func( 704 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 705 ), 706 exp.Struct: _struct_sql, 707 exp.Transform: rename_func("LIST_TRANSFORM"), 708 exp.TimeAdd: _date_delta_sql, 709 exp.Time: no_time_sql, 710 exp.TimeDiff: _timediff_sql, 711 exp.Timestamp: no_timestamp_sql, 712 exp.TimestampDiff: lambda self, e: self.func( 713 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 714 ), 715 exp.TimestampTrunc: timestamptrunc_sql(), 716 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 717 exp.TimeStrToTime: timestrtotime_sql, 718 exp.TimeStrToUnix: lambda self, e: self.func( 719 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 720 ), 721 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 722 exp.TimeToUnix: rename_func("EPOCH"), 723 exp.TsOrDiToDi: lambda self, 724 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 725 exp.TsOrDsAdd: _date_delta_sql, 726 exp.TsOrDsDiff: lambda self, e: self.func( 727 "DATE_DIFF", 728 f"'{e.args.get('unit') or 'DAY'}'", 729 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 730 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 731 ), 732 exp.UnixToStr: lambda self, e: self.func( 733 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 734 ), 735 exp.DatetimeTrunc: lambda self, e: self.func( 736 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 737 ), 738 exp.UnixToTime: _unix_to_time_sql, 739 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 740 exp.VariancePop: rename_func("VAR_POP"), 741 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 742 exp.Xor: bool_xor_sql, 743 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 744 rename_func("LEVENSHTEIN") 745 ), 746 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 747 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 748 exp.DateBin: rename_func("TIME_BUCKET"), 749 } 750 751 SUPPORTED_JSON_PATH_PARTS = { 752 exp.JSONPathKey, 753 exp.JSONPathRoot, 754 exp.JSONPathSubscript, 755 exp.JSONPathWildcard, 756 } 757 758 TYPE_MAPPING = { 759 **generator.Generator.TYPE_MAPPING, 760 exp.DataType.Type.BINARY: "BLOB", 761 exp.DataType.Type.BPCHAR: "TEXT", 762 exp.DataType.Type.CHAR: "TEXT", 763 exp.DataType.Type.DATETIME: "TIMESTAMP", 764 exp.DataType.Type.FLOAT: "REAL", 765 exp.DataType.Type.JSONB: "JSON", 766 exp.DataType.Type.NCHAR: "TEXT", 767 exp.DataType.Type.NVARCHAR: "TEXT", 768 exp.DataType.Type.UINT: "UINTEGER", 769 exp.DataType.Type.VARBINARY: "BLOB", 770 exp.DataType.Type.ROWVERSION: "BLOB", 771 exp.DataType.Type.VARCHAR: "TEXT", 772 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 773 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 774 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 775 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 776 } 777 778 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 779 RESERVED_KEYWORDS = { 780 "array", 781 "analyse", 782 "union", 783 "all", 784 "when", 785 "in_p", 786 "default", 787 "create_p", 788 "window", 789 "asymmetric", 790 "to", 791 "else", 792 "localtime", 793 "from", 794 "end_p", 795 "select", 796 "current_date", 797 "foreign", 798 "with", 799 "grant", 800 "session_user", 801 "or", 802 "except", 803 "references", 804 "fetch", 805 "limit", 806 "group_p", 807 "leading", 808 "into", 809 "collate", 810 "offset", 811 "do", 812 "then", 813 "localtimestamp", 814 "check_p", 815 "lateral_p", 816 "current_role", 817 "where", 818 "asc_p", 819 "placing", 820 "desc_p", 821 "user", 822 "unique", 823 "initially", 824 "column", 825 "both", 826 "some", 827 "as", 828 "any", 829 "only", 830 "deferrable", 831 "null_p", 832 "current_time", 833 "true_p", 834 "table", 835 "case", 836 "trailing", 837 "variadic", 838 "for", 839 "on", 840 "distinct", 841 "false_p", 842 "not", 843 "constraint", 844 "current_timestamp", 845 "returning", 846 "primary", 847 "intersect", 848 "having", 849 "analyze", 850 "current_user", 851 "and", 852 "cast", 853 "symmetric", 854 "using", 855 "order", 856 "current_catalog", 857 } 858 859 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 860 861 # DuckDB doesn't generally support CREATE TABLE .. properties 862 # https://duckdb.org/docs/sql/statements/create_table.html 863 PROPERTIES_LOCATION = { 864 prop: exp.Properties.Location.UNSUPPORTED 865 for prop in generator.Generator.PROPERTIES_LOCATION 866 } 867 868 # There are a few exceptions (e.g. temporary tables) which are supported or 869 # can be transpiled to DuckDB, so we explicitly override them accordingly 870 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 871 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 872 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 873 874 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 875 exp.FirstValue, 876 exp.Lag, 877 exp.LastValue, 878 exp.Lead, 879 exp.NthValue, 880 ) 881 882 def show_sql(self, expression: exp.Show) -> str: 883 return f"SHOW {expression.name}" 884 885 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 886 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 887 888 def strtotime_sql(self, expression: exp.StrToTime) -> str: 889 if expression.args.get("safe"): 890 formatted_time = self.format_time(expression) 891 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 892 return str_to_time_sql(self, expression) 893 894 def strtodate_sql(self, expression: exp.StrToDate) -> str: 895 if expression.args.get("safe"): 896 formatted_time = self.format_time(expression) 897 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 898 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 899 900 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 901 arg = expression.this 902 if expression.args.get("safe"): 903 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 904 return self.func("JSON", arg) 905 906 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 907 nano = expression.args.get("nano") 908 if nano is not None: 909 expression.set( 910 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 911 ) 912 913 return rename_func("MAKE_TIME")(self, expression) 914 915 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 916 sec = expression.args["sec"] 917 918 milli = expression.args.get("milli") 919 if milli is not None: 920 sec += milli.pop() / exp.Literal.number(1000.0) 921 922 nano = expression.args.get("nano") 923 if nano is not None: 924 sec += nano.pop() / exp.Literal.number(1000000000.0) 925 926 if milli or nano: 927 expression.set("sec", sec) 928 929 return rename_func("MAKE_TIMESTAMP")(self, expression) 930 931 def tablesample_sql( 932 self, 933 expression: exp.TableSample, 934 tablesample_keyword: t.Optional[str] = None, 935 ) -> str: 936 if not isinstance(expression.parent, exp.Select): 937 # This sample clause only applies to a single source, not the entire resulting relation 938 tablesample_keyword = "TABLESAMPLE" 939 940 if expression.args.get("size"): 941 method = expression.args.get("method") 942 if method and method.name.upper() != "RESERVOIR": 943 self.unsupported( 944 f"Sampling method {method} is not supported with a discrete sample count, " 945 "defaulting to reservoir sampling" 946 ) 947 expression.set("method", exp.var("RESERVOIR")) 948 949 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 950 951 def interval_sql(self, expression: exp.Interval) -> str: 952 multiplier: t.Optional[int] = None 953 unit = expression.text("unit").lower() 954 955 if unit.startswith("week"): 956 multiplier = 7 957 if unit.startswith("quarter"): 958 multiplier = 90 959 960 if multiplier: 961 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 962 963 return super().interval_sql(expression) 964 965 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 966 if isinstance(expression.parent, exp.UserDefinedFunction): 967 return self.sql(expression, "this") 968 return super().columndef_sql(expression, sep) 969 970 def join_sql(self, expression: exp.Join) -> str: 971 if ( 972 expression.side == "LEFT" 973 and not expression.args.get("on") 974 and isinstance(expression.this, exp.Unnest) 975 ): 976 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 977 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 978 return super().join_sql(expression.on(exp.true())) 979 980 return super().join_sql(expression) 981 982 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 983 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 984 if expression.args.get("is_end_exclusive"): 985 return rename_func("RANGE")(self, expression) 986 987 return self.function_fallback_sql(expression) 988 989 def countif_sql(self, expression: exp.CountIf) -> str: 990 if self.dialect.version >= Version("1.2"): 991 return self.function_fallback_sql(expression) 992 993 # https://github.com/tobymao/sqlglot/pull/4749 994 return count_if_to_sum(self, expression) 995 996 def bracket_sql(self, expression: exp.Bracket) -> str: 997 if self.dialect.version >= Version("1.2"): 998 return super().bracket_sql(expression) 999 1000 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1001 this = expression.this 1002 if isinstance(this, exp.Array): 1003 this.replace(exp.paren(this)) 1004 1005 bracket = super().bracket_sql(expression) 1006 1007 if not expression.args.get("returns_list_for_maps"): 1008 if not this.type: 1009 from sqlglot.optimizer.annotate_types import annotate_types 1010 1011 this = annotate_types(this, dialect=self.dialect) 1012 1013 if this.is_type(exp.DataType.Type.MAP): 1014 bracket = f"({bracket})[1]" 1015 1016 return bracket 1017 1018 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1019 expression_sql = self.sql(expression, "expression") 1020 1021 func = expression.this 1022 if isinstance(func, exp.PERCENTILES): 1023 # Make the order key the first arg and slide the fraction to the right 1024 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1025 order_col = expression.find(exp.Ordered) 1026 if order_col: 1027 func.set("expression", func.this) 1028 func.set("this", order_col.this) 1029 1030 this = self.sql(expression, "this").rstrip(")") 1031 1032 return f"{this}{expression_sql})" 1033 1034 def length_sql(self, expression: exp.Length) -> str: 1035 arg = expression.this 1036 1037 # Dialects like BQ and Snowflake also accept binary values as args, so 1038 # DDB will attempt to infer the type or resort to case/when resolution 1039 if not expression.args.get("binary") or arg.is_string: 1040 return self.func("LENGTH", arg) 1041 1042 if not arg.type: 1043 from sqlglot.optimizer.annotate_types import annotate_types 1044 1045 arg = annotate_types(arg, dialect=self.dialect) 1046 1047 if arg.is_type(*exp.DataType.TEXT_TYPES): 1048 return self.func("LENGTH", arg) 1049 1050 # We need these casts to make duckdb's static type checker happy 1051 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1052 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1053 1054 case = ( 1055 exp.case(self.func("TYPEOF", arg)) 1056 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1057 .else_( 1058 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1059 ) # anonymous to break length_sql recursion 1060 ) 1061 1062 return self.sql(case) 1063 1064 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1065 this = expression.this 1066 key = expression.args.get("key") 1067 key_sql = key.name if isinstance(key, exp.Expression) else "" 1068 value_sql = self.sql(expression, "value") 1069 1070 kv_sql = f"{key_sql} := {value_sql}" 1071 1072 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1073 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1074 if isinstance(this, exp.Struct) and not this.expressions: 1075 return self.func("STRUCT_PACK", kv_sql) 1076 1077 return self.func("STRUCT_INSERT", this, kv_sql) 1078 1079 def unnest_sql(self, expression: exp.Unnest) -> str: 1080 explode_array = expression.args.get("explode_array") 1081 if explode_array: 1082 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1083 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1084 expression.expressions.append( 1085 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1086 ) 1087 1088 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1089 alias = expression.args.get("alias") 1090 if alias: 1091 expression.set("alias", None) 1092 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1093 1094 unnest_sql = super().unnest_sql(expression) 1095 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1096 return self.sql(select) 1097 1098 return super().unnest_sql(expression) 1099 1100 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1101 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1102 # DuckDB should render IGNORE NULLS only for the general-purpose 1103 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1104 return super().ignorenulls_sql(expression) 1105 1106 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1107 return self.sql(expression, "this") 1108 1109 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1110 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1111 # DuckDB should render RESPECT NULLS only for the general-purpose 1112 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1113 return super().respectnulls_sql(expression) 1114 1115 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1116 return self.sql(expression, "this") 1117 1118 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1119 this = self.sql(expression, "this") 1120 null_text = self.sql(expression, "null") 1121 1122 if null_text: 1123 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1124 1125 return self.func("ARRAY_TO_STRING", this, expression.expression) 1126 1127 @unsupported_args("position", "occurrence") 1128 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1129 group = expression.args.get("group") 1130 params = expression.args.get("parameters") 1131 1132 # Do not render group if there is no following argument, 1133 # and it's the default value for this dialect 1134 if ( 1135 not params 1136 and group 1137 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1138 ): 1139 group = None 1140 return self.func( 1141 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1142 ) 1143 1144 @unsupported_args("culture") 1145 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1146 fmt = expression.args.get("format") 1147 if fmt and fmt.is_int: 1148 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1149 1150 self.unsupported("Only integer formats are supported by NumberToStr") 1151 return self.function_fallback_sql(expression) 1152 1153 def autoincrementcolumnconstraint_sql(self, _) -> str: 1154 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1155 return ""
279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path) 304 305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "CHARACTER VARYING": TokenType.TEXT, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 335 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 336 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 337 "TIMESTAMP_US": TokenType.TIMESTAMP, 338 "UBIGINT": TokenType.UBIGINT, 339 "UINTEGER": TokenType.UINT, 340 "USMALLINT": TokenType.USMALLINT, 341 "UTINYINT": TokenType.UTINYINT, 342 "VARCHAR": TokenType.TEXT, 343 } 344 KEYWORDS.pop("/*+") 345 346 SINGLE_TOKENS = { 347 **tokens.Tokenizer.SINGLE_TOKENS, 348 "$": TokenType.PARAMETER, 349 } 350 351 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 352 353 class Parser(parser.Parser): 354 BITWISE = { 355 **parser.Parser.BITWISE, 356 TokenType.TILDA: exp.RegexpLike, 357 } 358 BITWISE.pop(TokenType.CARET) 359 360 RANGE_PARSERS = { 361 **parser.Parser.RANGE_PARSERS, 362 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 363 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 364 } 365 366 EXPONENT = { 367 **parser.Parser.EXPONENT, 368 TokenType.CARET: exp.Pow, 369 TokenType.DSTAR: exp.Pow, 370 } 371 372 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 373 374 SHOW_PARSERS = { 375 "TABLES": _show_parser("TABLES"), 376 "ALL TABLES": _show_parser("ALL TABLES"), 377 } 378 379 FUNCTIONS = { 380 **parser.Parser.FUNCTIONS, 381 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 382 "ARRAY_SORT": exp.SortArray.from_arg_list, 383 "DATEDIFF": _build_date_diff, 384 "DATE_DIFF": _build_date_diff, 385 "DATE_TRUNC": date_trunc_to_time, 386 "DATETRUNC": date_trunc_to_time, 387 "DECODE": lambda args: exp.Decode( 388 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 389 ), 390 "EDITDIST3": exp.Levenshtein.from_arg_list, 391 "ENCODE": lambda args: exp.Encode( 392 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 393 ), 394 "EPOCH": exp.TimeToUnix.from_arg_list, 395 "EPOCH_MS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "GENERATE_SERIES": _build_generate_series(), 399 "JSON": exp.ParseJSON.from_arg_list, 400 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 401 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 402 "LIST_HAS": exp.ArrayContains.from_arg_list, 403 "LIST_REVERSE_SORT": _build_sort_array_desc, 404 "LIST_SORT": exp.SortArray.from_arg_list, 405 "LIST_VALUE": lambda args: exp.Array(expressions=args), 406 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 407 "MAKE_TIMESTAMP": _build_make_timestamp, 408 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 409 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 410 "RANGE": _build_generate_series(end_exclusive=True), 411 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 412 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 413 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 414 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 415 this=seq_get(args, 0), 416 expression=seq_get(args, 1), 417 replacement=seq_get(args, 2), 418 modifiers=seq_get(args, 3), 419 ), 420 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 421 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 422 "STRING_SPLIT": exp.Split.from_arg_list, 423 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 424 "STRING_TO_ARRAY": exp.Split.from_arg_list, 425 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 426 "STRUCT_PACK": exp.Struct.from_arg_list, 427 "STR_SPLIT": exp.Split.from_arg_list, 428 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 429 "TIME_BUCKET": exp.DateBin.from_arg_list, 430 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 431 "UNNEST": exp.Explode.from_arg_list, 432 "XOR": binary_from_function(exp.BitwiseXor), 433 } 434 435 FUNCTIONS.pop("DATE_SUB") 436 FUNCTIONS.pop("GLOB") 437 438 FUNCTION_PARSERS = { 439 **parser.Parser.FUNCTION_PARSERS, 440 **dict.fromkeys( 441 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 442 ), 443 } 444 FUNCTION_PARSERS.pop("DECODE") 445 446 NO_PAREN_FUNCTION_PARSERS = { 447 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 448 "MAP": lambda self: self._parse_map(), 449 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 450 } 451 452 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 453 TokenType.SEMI, 454 TokenType.ANTI, 455 } 456 457 PLACEHOLDER_PARSERS = { 458 **parser.Parser.PLACEHOLDER_PARSERS, 459 TokenType.PARAMETER: lambda self: ( 460 self.expression(exp.Placeholder, this=self._prev.text) 461 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 462 else None 463 ), 464 } 465 466 TYPE_CONVERTERS = { 467 # https://duckdb.org/docs/sql/data_types/numeric 468 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 469 # https://duckdb.org/docs/sql/data_types/text 470 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 471 } 472 473 STATEMENT_PARSERS = { 474 **parser.Parser.STATEMENT_PARSERS, 475 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 476 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 477 TokenType.SHOW: lambda self: self._parse_show(), 478 } 479 480 def _parse_expression(self) -> t.Optional[exp.Expression]: 481 # DuckDB supports prefix aliases, e.g. foo: 1 482 if self._next and self._next.token_type == TokenType.COLON: 483 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 484 self._match(TokenType.COLON) 485 comments = self._prev_comments or [] 486 487 this = self._parse_assignment() 488 if isinstance(this, exp.Expression): 489 # Moves the comment next to the alias in `alias: expr /* comment */` 490 comments += this.pop_comments() or [] 491 492 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 493 494 return super()._parse_expression() 495 496 def _parse_table( 497 self, 498 schema: bool = False, 499 joins: bool = False, 500 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 501 parse_bracket: bool = False, 502 is_db_reference: bool = False, 503 parse_partition: bool = False, 504 ) -> t.Optional[exp.Expression]: 505 # DuckDB supports prefix aliases, e.g. FROM foo: bar 506 if self._next and self._next.token_type == TokenType.COLON: 507 alias = self._parse_table_alias( 508 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 509 ) 510 self._match(TokenType.COLON) 511 comments = self._prev_comments or [] 512 else: 513 alias = None 514 comments = [] 515 516 table = super()._parse_table( 517 schema=schema, 518 joins=joins, 519 alias_tokens=alias_tokens, 520 parse_bracket=parse_bracket, 521 is_db_reference=is_db_reference, 522 parse_partition=parse_partition, 523 ) 524 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 525 # Moves the comment next to the alias in `alias: table /* comment */` 526 comments += table.pop_comments() or [] 527 alias.comments = alias.pop_comments() + comments 528 table.set("alias", alias) 529 530 return table 531 532 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 533 # https://duckdb.org/docs/sql/samples.html 534 sample = super()._parse_table_sample(as_modifier=as_modifier) 535 if sample and not sample.args.get("method"): 536 if sample.args.get("size"): 537 sample.set("method", exp.var("RESERVOIR")) 538 else: 539 sample.set("method", exp.var("SYSTEM")) 540 541 return sample 542 543 def _parse_bracket( 544 self, this: t.Optional[exp.Expression] = None 545 ) -> t.Optional[exp.Expression]: 546 bracket = super()._parse_bracket(this) 547 548 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 549 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 550 bracket.set("returns_list_for_maps", True) 551 552 return bracket 553 554 def _parse_map(self) -> exp.ToMap | exp.Map: 555 if self._match(TokenType.L_BRACE, advance=False): 556 return self.expression(exp.ToMap, this=self._parse_bracket()) 557 558 args = self._parse_wrapped_csv(self._parse_assignment) 559 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 560 561 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 562 return self._parse_field_def() 563 564 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 565 if len(aggregations) == 1: 566 return super()._pivot_column_names(aggregations) 567 return pivot_column_names(aggregations, dialect="duckdb") 568 569 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 570 def _parse_attach_option() -> exp.AttachOption: 571 return self.expression( 572 exp.AttachOption, 573 this=self._parse_var(any_token=True), 574 expression=self._parse_field(any_token=True), 575 ) 576 577 self._match(TokenType.DATABASE) 578 exists = self._parse_exists(not_=is_attach) 579 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 580 581 if self._match(TokenType.L_PAREN, advance=False): 582 expressions = self._parse_wrapped_csv(_parse_attach_option) 583 else: 584 expressions = None 585 586 return ( 587 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 588 if is_attach 589 else self.expression(exp.Detach, this=this, exists=exists) 590 ) 591 592 def _parse_show_duckdb(self, this: str) -> exp.Show: 593 return self.expression(exp.Show, this=this) 594 595 class Generator(generator.Generator): 596 PARAMETER_TOKEN = "$" 597 NAMED_PLACEHOLDER_TOKEN = "$" 598 JOIN_HINTS = False 599 TABLE_HINTS = False 600 QUERY_HINTS = False 601 LIMIT_FETCH = "LIMIT" 602 STRUCT_DELIMITER = ("(", ")") 603 RENAME_TABLE_WITH_DB = False 604 NVL2_SUPPORTED = False 605 SEMI_ANTI_JOIN_WITH_SIDE = False 606 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 607 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 608 LAST_DAY_SUPPORTS_DATE_PART = False 609 JSON_KEY_VALUE_PAIR_SEP = "," 610 IGNORE_NULLS_IN_FUNC = True 611 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 612 SUPPORTS_CREATE_TABLE_LIKE = False 613 MULTI_ARG_DISTINCT = False 614 CAN_IMPLEMENT_ARRAY_ANY = True 615 SUPPORTS_TO_NUMBER = False 616 COPY_HAS_INTO_KEYWORD = False 617 STAR_EXCEPT = "EXCLUDE" 618 PAD_FILL_PATTERN_IS_REQUIRED = True 619 ARRAY_CONCAT_IS_VAR_LEN = False 620 ARRAY_SIZE_DIM_REQUIRED = False 621 622 TRANSFORMS = { 623 **generator.Generator.TRANSFORMS, 624 exp.ApproxDistinct: approx_count_distinct_sql, 625 exp.Array: inline_array_unless_query, 626 exp.ArrayFilter: rename_func("LIST_FILTER"), 627 exp.ArraySort: _array_sort_sql, 628 exp.ArraySum: rename_func("LIST_SUM"), 629 exp.BitwiseXor: rename_func("XOR"), 630 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 631 exp.CurrentDate: lambda *_: "CURRENT_DATE", 632 exp.CurrentTime: lambda *_: "CURRENT_TIME", 633 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 634 exp.DayOfMonth: rename_func("DAYOFMONTH"), 635 exp.DayOfWeek: rename_func("DAYOFWEEK"), 636 exp.DayOfWeekIso: rename_func("ISODOW"), 637 exp.DayOfYear: rename_func("DAYOFYEAR"), 638 exp.DataType: _datatype_sql, 639 exp.Date: _date_sql, 640 exp.DateAdd: _date_delta_sql, 641 exp.DateFromParts: rename_func("MAKE_DATE"), 642 exp.DateSub: _date_delta_sql, 643 exp.DateDiff: _date_diff_sql, 644 exp.DateStrToDate: datestrtodate_sql, 645 exp.Datetime: no_datetime_sql, 646 exp.DatetimeSub: _date_delta_sql, 647 exp.DatetimeAdd: _date_delta_sql, 648 exp.DateToDi: lambda self, 649 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 650 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 651 exp.DiToDate: lambda self, 652 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 653 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 654 exp.GenerateDateArray: _generate_datetime_array_sql, 655 exp.GenerateTimestampArray: _generate_datetime_array_sql, 656 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 657 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 658 exp.Explode: rename_func("UNNEST"), 659 exp.IntDiv: lambda self, e: self.binary(e, "//"), 660 exp.IsInf: rename_func("ISINF"), 661 exp.IsNan: rename_func("ISNAN"), 662 exp.JSONBExists: rename_func("JSON_EXISTS"), 663 exp.JSONExtract: _arrow_json_extract_sql, 664 exp.JSONExtractArray: _json_extract_value_array_sql, 665 exp.JSONExtractScalar: _arrow_json_extract_sql, 666 exp.JSONFormat: _json_format_sql, 667 exp.JSONValueArray: _json_extract_value_array_sql, 668 exp.Lateral: explode_to_unnest_sql, 669 exp.LogicalOr: rename_func("BOOL_OR"), 670 exp.LogicalAnd: rename_func("BOOL_AND"), 671 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 672 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 673 exp.MonthsBetween: lambda self, e: self.func( 674 "DATEDIFF", 675 "'month'", 676 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 677 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 678 ), 679 exp.PercentileCont: rename_func("QUANTILE_CONT"), 680 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 681 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 682 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 683 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 684 exp.RegexpReplace: lambda self, e: self.func( 685 "REGEXP_REPLACE", 686 e.this, 687 e.expression, 688 e.args.get("replacement"), 689 e.args.get("modifiers"), 690 ), 691 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 692 exp.RegexpILike: lambda self, e: self.func( 693 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 694 ), 695 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 696 exp.Return: lambda self, e: self.sql(e, "this"), 697 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 698 exp.Rand: rename_func("RANDOM"), 699 exp.SHA: rename_func("SHA1"), 700 exp.SHA2: sha256_sql, 701 exp.Split: rename_func("STR_SPLIT"), 702 exp.SortArray: _sort_array_sql, 703 exp.StrPosition: strposition_sql, 704 exp.StrToUnix: lambda self, e: self.func( 705 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 706 ), 707 exp.Struct: _struct_sql, 708 exp.Transform: rename_func("LIST_TRANSFORM"), 709 exp.TimeAdd: _date_delta_sql, 710 exp.Time: no_time_sql, 711 exp.TimeDiff: _timediff_sql, 712 exp.Timestamp: no_timestamp_sql, 713 exp.TimestampDiff: lambda self, e: self.func( 714 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 715 ), 716 exp.TimestampTrunc: timestamptrunc_sql(), 717 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 718 exp.TimeStrToTime: timestrtotime_sql, 719 exp.TimeStrToUnix: lambda self, e: self.func( 720 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 721 ), 722 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 723 exp.TimeToUnix: rename_func("EPOCH"), 724 exp.TsOrDiToDi: lambda self, 725 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 726 exp.TsOrDsAdd: _date_delta_sql, 727 exp.TsOrDsDiff: lambda self, e: self.func( 728 "DATE_DIFF", 729 f"'{e.args.get('unit') or 'DAY'}'", 730 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 731 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 732 ), 733 exp.UnixToStr: lambda self, e: self.func( 734 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 735 ), 736 exp.DatetimeTrunc: lambda self, e: self.func( 737 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 738 ), 739 exp.UnixToTime: _unix_to_time_sql, 740 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 741 exp.VariancePop: rename_func("VAR_POP"), 742 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 743 exp.Xor: bool_xor_sql, 744 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 745 rename_func("LEVENSHTEIN") 746 ), 747 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 748 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 749 exp.DateBin: rename_func("TIME_BUCKET"), 750 } 751 752 SUPPORTED_JSON_PATH_PARTS = { 753 exp.JSONPathKey, 754 exp.JSONPathRoot, 755 exp.JSONPathSubscript, 756 exp.JSONPathWildcard, 757 } 758 759 TYPE_MAPPING = { 760 **generator.Generator.TYPE_MAPPING, 761 exp.DataType.Type.BINARY: "BLOB", 762 exp.DataType.Type.BPCHAR: "TEXT", 763 exp.DataType.Type.CHAR: "TEXT", 764 exp.DataType.Type.DATETIME: "TIMESTAMP", 765 exp.DataType.Type.FLOAT: "REAL", 766 exp.DataType.Type.JSONB: "JSON", 767 exp.DataType.Type.NCHAR: "TEXT", 768 exp.DataType.Type.NVARCHAR: "TEXT", 769 exp.DataType.Type.UINT: "UINTEGER", 770 exp.DataType.Type.VARBINARY: "BLOB", 771 exp.DataType.Type.ROWVERSION: "BLOB", 772 exp.DataType.Type.VARCHAR: "TEXT", 773 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 774 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 775 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 776 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 777 } 778 779 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 780 RESERVED_KEYWORDS = { 781 "array", 782 "analyse", 783 "union", 784 "all", 785 "when", 786 "in_p", 787 "default", 788 "create_p", 789 "window", 790 "asymmetric", 791 "to", 792 "else", 793 "localtime", 794 "from", 795 "end_p", 796 "select", 797 "current_date", 798 "foreign", 799 "with", 800 "grant", 801 "session_user", 802 "or", 803 "except", 804 "references", 805 "fetch", 806 "limit", 807 "group_p", 808 "leading", 809 "into", 810 "collate", 811 "offset", 812 "do", 813 "then", 814 "localtimestamp", 815 "check_p", 816 "lateral_p", 817 "current_role", 818 "where", 819 "asc_p", 820 "placing", 821 "desc_p", 822 "user", 823 "unique", 824 "initially", 825 "column", 826 "both", 827 "some", 828 "as", 829 "any", 830 "only", 831 "deferrable", 832 "null_p", 833 "current_time", 834 "true_p", 835 "table", 836 "case", 837 "trailing", 838 "variadic", 839 "for", 840 "on", 841 "distinct", 842 "false_p", 843 "not", 844 "constraint", 845 "current_timestamp", 846 "returning", 847 "primary", 848 "intersect", 849 "having", 850 "analyze", 851 "current_user", 852 "and", 853 "cast", 854 "symmetric", 855 "using", 856 "order", 857 "current_catalog", 858 } 859 860 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 861 862 # DuckDB doesn't generally support CREATE TABLE .. properties 863 # https://duckdb.org/docs/sql/statements/create_table.html 864 PROPERTIES_LOCATION = { 865 prop: exp.Properties.Location.UNSUPPORTED 866 for prop in generator.Generator.PROPERTIES_LOCATION 867 } 868 869 # There are a few exceptions (e.g. temporary tables) which are supported or 870 # can be transpiled to DuckDB, so we explicitly override them accordingly 871 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 872 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 873 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 874 875 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 876 exp.FirstValue, 877 exp.Lag, 878 exp.LastValue, 879 exp.Lead, 880 exp.NthValue, 881 ) 882 883 def show_sql(self, expression: exp.Show) -> str: 884 return f"SHOW {expression.name}" 885 886 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 887 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 888 889 def strtotime_sql(self, expression: exp.StrToTime) -> str: 890 if expression.args.get("safe"): 891 formatted_time = self.format_time(expression) 892 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 893 return str_to_time_sql(self, expression) 894 895 def strtodate_sql(self, expression: exp.StrToDate) -> str: 896 if expression.args.get("safe"): 897 formatted_time = self.format_time(expression) 898 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 899 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 900 901 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 902 arg = expression.this 903 if expression.args.get("safe"): 904 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 905 return self.func("JSON", arg) 906 907 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 908 nano = expression.args.get("nano") 909 if nano is not None: 910 expression.set( 911 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 912 ) 913 914 return rename_func("MAKE_TIME")(self, expression) 915 916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 sec = expression.args["sec"] 918 919 milli = expression.args.get("milli") 920 if milli is not None: 921 sec += milli.pop() / exp.Literal.number(1000.0) 922 923 nano = expression.args.get("nano") 924 if nano is not None: 925 sec += nano.pop() / exp.Literal.number(1000000000.0) 926 927 if milli or nano: 928 expression.set("sec", sec) 929 930 return rename_func("MAKE_TIMESTAMP")(self, expression) 931 932 def tablesample_sql( 933 self, 934 expression: exp.TableSample, 935 tablesample_keyword: t.Optional[str] = None, 936 ) -> str: 937 if not isinstance(expression.parent, exp.Select): 938 # This sample clause only applies to a single source, not the entire resulting relation 939 tablesample_keyword = "TABLESAMPLE" 940 941 if expression.args.get("size"): 942 method = expression.args.get("method") 943 if method and method.name.upper() != "RESERVOIR": 944 self.unsupported( 945 f"Sampling method {method} is not supported with a discrete sample count, " 946 "defaulting to reservoir sampling" 947 ) 948 expression.set("method", exp.var("RESERVOIR")) 949 950 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 951 952 def interval_sql(self, expression: exp.Interval) -> str: 953 multiplier: t.Optional[int] = None 954 unit = expression.text("unit").lower() 955 956 if unit.startswith("week"): 957 multiplier = 7 958 if unit.startswith("quarter"): 959 multiplier = 90 960 961 if multiplier: 962 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 963 964 return super().interval_sql(expression) 965 966 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 967 if isinstance(expression.parent, exp.UserDefinedFunction): 968 return self.sql(expression, "this") 969 return super().columndef_sql(expression, sep) 970 971 def join_sql(self, expression: exp.Join) -> str: 972 if ( 973 expression.side == "LEFT" 974 and not expression.args.get("on") 975 and isinstance(expression.this, exp.Unnest) 976 ): 977 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 978 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 979 return super().join_sql(expression.on(exp.true())) 980 981 return super().join_sql(expression) 982 983 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 984 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 985 if expression.args.get("is_end_exclusive"): 986 return rename_func("RANGE")(self, expression) 987 988 return self.function_fallback_sql(expression) 989 990 def countif_sql(self, expression: exp.CountIf) -> str: 991 if self.dialect.version >= Version("1.2"): 992 return self.function_fallback_sql(expression) 993 994 # https://github.com/tobymao/sqlglot/pull/4749 995 return count_if_to_sum(self, expression) 996 997 def bracket_sql(self, expression: exp.Bracket) -> str: 998 if self.dialect.version >= Version("1.2"): 999 return super().bracket_sql(expression) 1000 1001 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1002 this = expression.this 1003 if isinstance(this, exp.Array): 1004 this.replace(exp.paren(this)) 1005 1006 bracket = super().bracket_sql(expression) 1007 1008 if not expression.args.get("returns_list_for_maps"): 1009 if not this.type: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 this = annotate_types(this, dialect=self.dialect) 1013 1014 if this.is_type(exp.DataType.Type.MAP): 1015 bracket = f"({bracket})[1]" 1016 1017 return bracket 1018 1019 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1020 expression_sql = self.sql(expression, "expression") 1021 1022 func = expression.this 1023 if isinstance(func, exp.PERCENTILES): 1024 # Make the order key the first arg and slide the fraction to the right 1025 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1026 order_col = expression.find(exp.Ordered) 1027 if order_col: 1028 func.set("expression", func.this) 1029 func.set("this", order_col.this) 1030 1031 this = self.sql(expression, "this").rstrip(")") 1032 1033 return f"{this}{expression_sql})" 1034 1035 def length_sql(self, expression: exp.Length) -> str: 1036 arg = expression.this 1037 1038 # Dialects like BQ and Snowflake also accept binary values as args, so 1039 # DDB will attempt to infer the type or resort to case/when resolution 1040 if not expression.args.get("binary") or arg.is_string: 1041 return self.func("LENGTH", arg) 1042 1043 if not arg.type: 1044 from sqlglot.optimizer.annotate_types import annotate_types 1045 1046 arg = annotate_types(arg, dialect=self.dialect) 1047 1048 if arg.is_type(*exp.DataType.TEXT_TYPES): 1049 return self.func("LENGTH", arg) 1050 1051 # We need these casts to make duckdb's static type checker happy 1052 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1053 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1054 1055 case = ( 1056 exp.case(self.func("TYPEOF", arg)) 1057 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1058 .else_( 1059 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1060 ) # anonymous to break length_sql recursion 1061 ) 1062 1063 return self.sql(case) 1064 1065 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1066 this = expression.this 1067 key = expression.args.get("key") 1068 key_sql = key.name if isinstance(key, exp.Expression) else "" 1069 value_sql = self.sql(expression, "value") 1070 1071 kv_sql = f"{key_sql} := {value_sql}" 1072 1073 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1074 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1075 if isinstance(this, exp.Struct) and not this.expressions: 1076 return self.func("STRUCT_PACK", kv_sql) 1077 1078 return self.func("STRUCT_INSERT", this, kv_sql) 1079 1080 def unnest_sql(self, expression: exp.Unnest) -> str: 1081 explode_array = expression.args.get("explode_array") 1082 if explode_array: 1083 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1084 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1085 expression.expressions.append( 1086 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1087 ) 1088 1089 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1090 alias = expression.args.get("alias") 1091 if alias: 1092 expression.set("alias", None) 1093 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1094 1095 unnest_sql = super().unnest_sql(expression) 1096 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1097 return self.sql(select) 1098 1099 return super().unnest_sql(expression) 1100 1101 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1102 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1103 # DuckDB should render IGNORE NULLS only for the general-purpose 1104 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1105 return super().ignorenulls_sql(expression) 1106 1107 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1108 return self.sql(expression, "this") 1109 1110 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1111 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1112 # DuckDB should render RESPECT NULLS only for the general-purpose 1113 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1114 return super().respectnulls_sql(expression) 1115 1116 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1117 return self.sql(expression, "this") 1118 1119 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1120 this = self.sql(expression, "this") 1121 null_text = self.sql(expression, "null") 1122 1123 if null_text: 1124 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1125 1126 return self.func("ARRAY_TO_STRING", this, expression.expression) 1127 1128 @unsupported_args("position", "occurrence") 1129 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1130 group = expression.args.get("group") 1131 params = expression.args.get("parameters") 1132 1133 # Do not render group if there is no following argument, 1134 # and it's the default value for this dialect 1135 if ( 1136 not params 1137 and group 1138 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1139 ): 1140 group = None 1141 return self.func( 1142 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1143 ) 1144 1145 @unsupported_args("culture") 1146 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1147 fmt = expression.args.get("format") 1148 if fmt and fmt.is_int: 1149 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1150 1151 self.unsupported("Only integer formats are supported by NumberToStr") 1152 return self.function_fallback_sql(expression) 1153 1154 def autoincrementcolumnconstraint_sql(self, _) -> str: 1155 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1156 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path)
305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "CHARACTER VARYING": TokenType.TEXT, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 335 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 336 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 337 "TIMESTAMP_US": TokenType.TIMESTAMP, 338 "UBIGINT": TokenType.UBIGINT, 339 "UINTEGER": TokenType.UINT, 340 "USMALLINT": TokenType.USMALLINT, 341 "UTINYINT": TokenType.UTINYINT, 342 "VARCHAR": TokenType.TEXT, 343 } 344 KEYWORDS.pop("/*+") 345 346 SINGLE_TOKENS = { 347 **tokens.Tokenizer.SINGLE_TOKENS, 348 "$": TokenType.PARAMETER, 349 } 350 351 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
353 class Parser(parser.Parser): 354 BITWISE = { 355 **parser.Parser.BITWISE, 356 TokenType.TILDA: exp.RegexpLike, 357 } 358 BITWISE.pop(TokenType.CARET) 359 360 RANGE_PARSERS = { 361 **parser.Parser.RANGE_PARSERS, 362 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 363 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 364 } 365 366 EXPONENT = { 367 **parser.Parser.EXPONENT, 368 TokenType.CARET: exp.Pow, 369 TokenType.DSTAR: exp.Pow, 370 } 371 372 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 373 374 SHOW_PARSERS = { 375 "TABLES": _show_parser("TABLES"), 376 "ALL TABLES": _show_parser("ALL TABLES"), 377 } 378 379 FUNCTIONS = { 380 **parser.Parser.FUNCTIONS, 381 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 382 "ARRAY_SORT": exp.SortArray.from_arg_list, 383 "DATEDIFF": _build_date_diff, 384 "DATE_DIFF": _build_date_diff, 385 "DATE_TRUNC": date_trunc_to_time, 386 "DATETRUNC": date_trunc_to_time, 387 "DECODE": lambda args: exp.Decode( 388 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 389 ), 390 "EDITDIST3": exp.Levenshtein.from_arg_list, 391 "ENCODE": lambda args: exp.Encode( 392 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 393 ), 394 "EPOCH": exp.TimeToUnix.from_arg_list, 395 "EPOCH_MS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "GENERATE_SERIES": _build_generate_series(), 399 "JSON": exp.ParseJSON.from_arg_list, 400 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 401 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 402 "LIST_HAS": exp.ArrayContains.from_arg_list, 403 "LIST_REVERSE_SORT": _build_sort_array_desc, 404 "LIST_SORT": exp.SortArray.from_arg_list, 405 "LIST_VALUE": lambda args: exp.Array(expressions=args), 406 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 407 "MAKE_TIMESTAMP": _build_make_timestamp, 408 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 409 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 410 "RANGE": _build_generate_series(end_exclusive=True), 411 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 412 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 413 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 414 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 415 this=seq_get(args, 0), 416 expression=seq_get(args, 1), 417 replacement=seq_get(args, 2), 418 modifiers=seq_get(args, 3), 419 ), 420 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 421 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 422 "STRING_SPLIT": exp.Split.from_arg_list, 423 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 424 "STRING_TO_ARRAY": exp.Split.from_arg_list, 425 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 426 "STRUCT_PACK": exp.Struct.from_arg_list, 427 "STR_SPLIT": exp.Split.from_arg_list, 428 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 429 "TIME_BUCKET": exp.DateBin.from_arg_list, 430 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 431 "UNNEST": exp.Explode.from_arg_list, 432 "XOR": binary_from_function(exp.BitwiseXor), 433 } 434 435 FUNCTIONS.pop("DATE_SUB") 436 FUNCTIONS.pop("GLOB") 437 438 FUNCTION_PARSERS = { 439 **parser.Parser.FUNCTION_PARSERS, 440 **dict.fromkeys( 441 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 442 ), 443 } 444 FUNCTION_PARSERS.pop("DECODE") 445 446 NO_PAREN_FUNCTION_PARSERS = { 447 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 448 "MAP": lambda self: self._parse_map(), 449 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 450 } 451 452 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 453 TokenType.SEMI, 454 TokenType.ANTI, 455 } 456 457 PLACEHOLDER_PARSERS = { 458 **parser.Parser.PLACEHOLDER_PARSERS, 459 TokenType.PARAMETER: lambda self: ( 460 self.expression(exp.Placeholder, this=self._prev.text) 461 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 462 else None 463 ), 464 } 465 466 TYPE_CONVERTERS = { 467 # https://duckdb.org/docs/sql/data_types/numeric 468 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 469 # https://duckdb.org/docs/sql/data_types/text 470 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 471 } 472 473 STATEMENT_PARSERS = { 474 **parser.Parser.STATEMENT_PARSERS, 475 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 476 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 477 TokenType.SHOW: lambda self: self._parse_show(), 478 } 479 480 def _parse_expression(self) -> t.Optional[exp.Expression]: 481 # DuckDB supports prefix aliases, e.g. foo: 1 482 if self._next and self._next.token_type == TokenType.COLON: 483 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 484 self._match(TokenType.COLON) 485 comments = self._prev_comments or [] 486 487 this = self._parse_assignment() 488 if isinstance(this, exp.Expression): 489 # Moves the comment next to the alias in `alias: expr /* comment */` 490 comments += this.pop_comments() or [] 491 492 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 493 494 return super()._parse_expression() 495 496 def _parse_table( 497 self, 498 schema: bool = False, 499 joins: bool = False, 500 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 501 parse_bracket: bool = False, 502 is_db_reference: bool = False, 503 parse_partition: bool = False, 504 ) -> t.Optional[exp.Expression]: 505 # DuckDB supports prefix aliases, e.g. FROM foo: bar 506 if self._next and self._next.token_type == TokenType.COLON: 507 alias = self._parse_table_alias( 508 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 509 ) 510 self._match(TokenType.COLON) 511 comments = self._prev_comments or [] 512 else: 513 alias = None 514 comments = [] 515 516 table = super()._parse_table( 517 schema=schema, 518 joins=joins, 519 alias_tokens=alias_tokens, 520 parse_bracket=parse_bracket, 521 is_db_reference=is_db_reference, 522 parse_partition=parse_partition, 523 ) 524 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 525 # Moves the comment next to the alias in `alias: table /* comment */` 526 comments += table.pop_comments() or [] 527 alias.comments = alias.pop_comments() + comments 528 table.set("alias", alias) 529 530 return table 531 532 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 533 # https://duckdb.org/docs/sql/samples.html 534 sample = super()._parse_table_sample(as_modifier=as_modifier) 535 if sample and not sample.args.get("method"): 536 if sample.args.get("size"): 537 sample.set("method", exp.var("RESERVOIR")) 538 else: 539 sample.set("method", exp.var("SYSTEM")) 540 541 return sample 542 543 def _parse_bracket( 544 self, this: t.Optional[exp.Expression] = None 545 ) -> t.Optional[exp.Expression]: 546 bracket = super()._parse_bracket(this) 547 548 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 549 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 550 bracket.set("returns_list_for_maps", True) 551 552 return bracket 553 554 def _parse_map(self) -> exp.ToMap | exp.Map: 555 if self._match(TokenType.L_BRACE, advance=False): 556 return self.expression(exp.ToMap, this=self._parse_bracket()) 557 558 args = self._parse_wrapped_csv(self._parse_assignment) 559 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 560 561 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 562 return self._parse_field_def() 563 564 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 565 if len(aggregations) == 1: 566 return super()._pivot_column_names(aggregations) 567 return pivot_column_names(aggregations, dialect="duckdb") 568 569 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 570 def _parse_attach_option() -> exp.AttachOption: 571 return self.expression( 572 exp.AttachOption, 573 this=self._parse_var(any_token=True), 574 expression=self._parse_field(any_token=True), 575 ) 576 577 self._match(TokenType.DATABASE) 578 exists = self._parse_exists(not_=is_attach) 579 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 580 581 if self._match(TokenType.L_PAREN, advance=False): 582 expressions = self._parse_wrapped_csv(_parse_attach_option) 583 else: 584 expressions = None 585 586 return ( 587 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 588 if is_attach 589 else self.expression(exp.Detach, this=this, exists=exists) 590 ) 591 592 def _parse_show_duckdb(self, this: str) -> exp.Show: 593 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
595 class Generator(generator.Generator): 596 PARAMETER_TOKEN = "$" 597 NAMED_PLACEHOLDER_TOKEN = "$" 598 JOIN_HINTS = False 599 TABLE_HINTS = False 600 QUERY_HINTS = False 601 LIMIT_FETCH = "LIMIT" 602 STRUCT_DELIMITER = ("(", ")") 603 RENAME_TABLE_WITH_DB = False 604 NVL2_SUPPORTED = False 605 SEMI_ANTI_JOIN_WITH_SIDE = False 606 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 607 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 608 LAST_DAY_SUPPORTS_DATE_PART = False 609 JSON_KEY_VALUE_PAIR_SEP = "," 610 IGNORE_NULLS_IN_FUNC = True 611 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 612 SUPPORTS_CREATE_TABLE_LIKE = False 613 MULTI_ARG_DISTINCT = False 614 CAN_IMPLEMENT_ARRAY_ANY = True 615 SUPPORTS_TO_NUMBER = False 616 COPY_HAS_INTO_KEYWORD = False 617 STAR_EXCEPT = "EXCLUDE" 618 PAD_FILL_PATTERN_IS_REQUIRED = True 619 ARRAY_CONCAT_IS_VAR_LEN = False 620 ARRAY_SIZE_DIM_REQUIRED = False 621 622 TRANSFORMS = { 623 **generator.Generator.TRANSFORMS, 624 exp.ApproxDistinct: approx_count_distinct_sql, 625 exp.Array: inline_array_unless_query, 626 exp.ArrayFilter: rename_func("LIST_FILTER"), 627 exp.ArraySort: _array_sort_sql, 628 exp.ArraySum: rename_func("LIST_SUM"), 629 exp.BitwiseXor: rename_func("XOR"), 630 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 631 exp.CurrentDate: lambda *_: "CURRENT_DATE", 632 exp.CurrentTime: lambda *_: "CURRENT_TIME", 633 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 634 exp.DayOfMonth: rename_func("DAYOFMONTH"), 635 exp.DayOfWeek: rename_func("DAYOFWEEK"), 636 exp.DayOfWeekIso: rename_func("ISODOW"), 637 exp.DayOfYear: rename_func("DAYOFYEAR"), 638 exp.DataType: _datatype_sql, 639 exp.Date: _date_sql, 640 exp.DateAdd: _date_delta_sql, 641 exp.DateFromParts: rename_func("MAKE_DATE"), 642 exp.DateSub: _date_delta_sql, 643 exp.DateDiff: _date_diff_sql, 644 exp.DateStrToDate: datestrtodate_sql, 645 exp.Datetime: no_datetime_sql, 646 exp.DatetimeSub: _date_delta_sql, 647 exp.DatetimeAdd: _date_delta_sql, 648 exp.DateToDi: lambda self, 649 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 650 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 651 exp.DiToDate: lambda self, 652 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 653 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 654 exp.GenerateDateArray: _generate_datetime_array_sql, 655 exp.GenerateTimestampArray: _generate_datetime_array_sql, 656 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 657 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 658 exp.Explode: rename_func("UNNEST"), 659 exp.IntDiv: lambda self, e: self.binary(e, "//"), 660 exp.IsInf: rename_func("ISINF"), 661 exp.IsNan: rename_func("ISNAN"), 662 exp.JSONBExists: rename_func("JSON_EXISTS"), 663 exp.JSONExtract: _arrow_json_extract_sql, 664 exp.JSONExtractArray: _json_extract_value_array_sql, 665 exp.JSONExtractScalar: _arrow_json_extract_sql, 666 exp.JSONFormat: _json_format_sql, 667 exp.JSONValueArray: _json_extract_value_array_sql, 668 exp.Lateral: explode_to_unnest_sql, 669 exp.LogicalOr: rename_func("BOOL_OR"), 670 exp.LogicalAnd: rename_func("BOOL_AND"), 671 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 672 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 673 exp.MonthsBetween: lambda self, e: self.func( 674 "DATEDIFF", 675 "'month'", 676 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 677 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 678 ), 679 exp.PercentileCont: rename_func("QUANTILE_CONT"), 680 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 681 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 682 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 683 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 684 exp.RegexpReplace: lambda self, e: self.func( 685 "REGEXP_REPLACE", 686 e.this, 687 e.expression, 688 e.args.get("replacement"), 689 e.args.get("modifiers"), 690 ), 691 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 692 exp.RegexpILike: lambda self, e: self.func( 693 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 694 ), 695 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 696 exp.Return: lambda self, e: self.sql(e, "this"), 697 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 698 exp.Rand: rename_func("RANDOM"), 699 exp.SHA: rename_func("SHA1"), 700 exp.SHA2: sha256_sql, 701 exp.Split: rename_func("STR_SPLIT"), 702 exp.SortArray: _sort_array_sql, 703 exp.StrPosition: strposition_sql, 704 exp.StrToUnix: lambda self, e: self.func( 705 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 706 ), 707 exp.Struct: _struct_sql, 708 exp.Transform: rename_func("LIST_TRANSFORM"), 709 exp.TimeAdd: _date_delta_sql, 710 exp.Time: no_time_sql, 711 exp.TimeDiff: _timediff_sql, 712 exp.Timestamp: no_timestamp_sql, 713 exp.TimestampDiff: lambda self, e: self.func( 714 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 715 ), 716 exp.TimestampTrunc: timestamptrunc_sql(), 717 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 718 exp.TimeStrToTime: timestrtotime_sql, 719 exp.TimeStrToUnix: lambda self, e: self.func( 720 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 721 ), 722 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 723 exp.TimeToUnix: rename_func("EPOCH"), 724 exp.TsOrDiToDi: lambda self, 725 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 726 exp.TsOrDsAdd: _date_delta_sql, 727 exp.TsOrDsDiff: lambda self, e: self.func( 728 "DATE_DIFF", 729 f"'{e.args.get('unit') or 'DAY'}'", 730 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 731 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 732 ), 733 exp.UnixToStr: lambda self, e: self.func( 734 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 735 ), 736 exp.DatetimeTrunc: lambda self, e: self.func( 737 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 738 ), 739 exp.UnixToTime: _unix_to_time_sql, 740 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 741 exp.VariancePop: rename_func("VAR_POP"), 742 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 743 exp.Xor: bool_xor_sql, 744 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 745 rename_func("LEVENSHTEIN") 746 ), 747 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 748 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 749 exp.DateBin: rename_func("TIME_BUCKET"), 750 } 751 752 SUPPORTED_JSON_PATH_PARTS = { 753 exp.JSONPathKey, 754 exp.JSONPathRoot, 755 exp.JSONPathSubscript, 756 exp.JSONPathWildcard, 757 } 758 759 TYPE_MAPPING = { 760 **generator.Generator.TYPE_MAPPING, 761 exp.DataType.Type.BINARY: "BLOB", 762 exp.DataType.Type.BPCHAR: "TEXT", 763 exp.DataType.Type.CHAR: "TEXT", 764 exp.DataType.Type.DATETIME: "TIMESTAMP", 765 exp.DataType.Type.FLOAT: "REAL", 766 exp.DataType.Type.JSONB: "JSON", 767 exp.DataType.Type.NCHAR: "TEXT", 768 exp.DataType.Type.NVARCHAR: "TEXT", 769 exp.DataType.Type.UINT: "UINTEGER", 770 exp.DataType.Type.VARBINARY: "BLOB", 771 exp.DataType.Type.ROWVERSION: "BLOB", 772 exp.DataType.Type.VARCHAR: "TEXT", 773 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 774 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 775 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 776 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 777 } 778 779 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 780 RESERVED_KEYWORDS = { 781 "array", 782 "analyse", 783 "union", 784 "all", 785 "when", 786 "in_p", 787 "default", 788 "create_p", 789 "window", 790 "asymmetric", 791 "to", 792 "else", 793 "localtime", 794 "from", 795 "end_p", 796 "select", 797 "current_date", 798 "foreign", 799 "with", 800 "grant", 801 "session_user", 802 "or", 803 "except", 804 "references", 805 "fetch", 806 "limit", 807 "group_p", 808 "leading", 809 "into", 810 "collate", 811 "offset", 812 "do", 813 "then", 814 "localtimestamp", 815 "check_p", 816 "lateral_p", 817 "current_role", 818 "where", 819 "asc_p", 820 "placing", 821 "desc_p", 822 "user", 823 "unique", 824 "initially", 825 "column", 826 "both", 827 "some", 828 "as", 829 "any", 830 "only", 831 "deferrable", 832 "null_p", 833 "current_time", 834 "true_p", 835 "table", 836 "case", 837 "trailing", 838 "variadic", 839 "for", 840 "on", 841 "distinct", 842 "false_p", 843 "not", 844 "constraint", 845 "current_timestamp", 846 "returning", 847 "primary", 848 "intersect", 849 "having", 850 "analyze", 851 "current_user", 852 "and", 853 "cast", 854 "symmetric", 855 "using", 856 "order", 857 "current_catalog", 858 } 859 860 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 861 862 # DuckDB doesn't generally support CREATE TABLE .. properties 863 # https://duckdb.org/docs/sql/statements/create_table.html 864 PROPERTIES_LOCATION = { 865 prop: exp.Properties.Location.UNSUPPORTED 866 for prop in generator.Generator.PROPERTIES_LOCATION 867 } 868 869 # There are a few exceptions (e.g. temporary tables) which are supported or 870 # can be transpiled to DuckDB, so we explicitly override them accordingly 871 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 872 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 873 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 874 875 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 876 exp.FirstValue, 877 exp.Lag, 878 exp.LastValue, 879 exp.Lead, 880 exp.NthValue, 881 ) 882 883 def show_sql(self, expression: exp.Show) -> str: 884 return f"SHOW {expression.name}" 885 886 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 887 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 888 889 def strtotime_sql(self, expression: exp.StrToTime) -> str: 890 if expression.args.get("safe"): 891 formatted_time = self.format_time(expression) 892 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 893 return str_to_time_sql(self, expression) 894 895 def strtodate_sql(self, expression: exp.StrToDate) -> str: 896 if expression.args.get("safe"): 897 formatted_time = self.format_time(expression) 898 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 899 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 900 901 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 902 arg = expression.this 903 if expression.args.get("safe"): 904 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 905 return self.func("JSON", arg) 906 907 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 908 nano = expression.args.get("nano") 909 if nano is not None: 910 expression.set( 911 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 912 ) 913 914 return rename_func("MAKE_TIME")(self, expression) 915 916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 sec = expression.args["sec"] 918 919 milli = expression.args.get("milli") 920 if milli is not None: 921 sec += milli.pop() / exp.Literal.number(1000.0) 922 923 nano = expression.args.get("nano") 924 if nano is not None: 925 sec += nano.pop() / exp.Literal.number(1000000000.0) 926 927 if milli or nano: 928 expression.set("sec", sec) 929 930 return rename_func("MAKE_TIMESTAMP")(self, expression) 931 932 def tablesample_sql( 933 self, 934 expression: exp.TableSample, 935 tablesample_keyword: t.Optional[str] = None, 936 ) -> str: 937 if not isinstance(expression.parent, exp.Select): 938 # This sample clause only applies to a single source, not the entire resulting relation 939 tablesample_keyword = "TABLESAMPLE" 940 941 if expression.args.get("size"): 942 method = expression.args.get("method") 943 if method and method.name.upper() != "RESERVOIR": 944 self.unsupported( 945 f"Sampling method {method} is not supported with a discrete sample count, " 946 "defaulting to reservoir sampling" 947 ) 948 expression.set("method", exp.var("RESERVOIR")) 949 950 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 951 952 def interval_sql(self, expression: exp.Interval) -> str: 953 multiplier: t.Optional[int] = None 954 unit = expression.text("unit").lower() 955 956 if unit.startswith("week"): 957 multiplier = 7 958 if unit.startswith("quarter"): 959 multiplier = 90 960 961 if multiplier: 962 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 963 964 return super().interval_sql(expression) 965 966 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 967 if isinstance(expression.parent, exp.UserDefinedFunction): 968 return self.sql(expression, "this") 969 return super().columndef_sql(expression, sep) 970 971 def join_sql(self, expression: exp.Join) -> str: 972 if ( 973 expression.side == "LEFT" 974 and not expression.args.get("on") 975 and isinstance(expression.this, exp.Unnest) 976 ): 977 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 978 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 979 return super().join_sql(expression.on(exp.true())) 980 981 return super().join_sql(expression) 982 983 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 984 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 985 if expression.args.get("is_end_exclusive"): 986 return rename_func("RANGE")(self, expression) 987 988 return self.function_fallback_sql(expression) 989 990 def countif_sql(self, expression: exp.CountIf) -> str: 991 if self.dialect.version >= Version("1.2"): 992 return self.function_fallback_sql(expression) 993 994 # https://github.com/tobymao/sqlglot/pull/4749 995 return count_if_to_sum(self, expression) 996 997 def bracket_sql(self, expression: exp.Bracket) -> str: 998 if self.dialect.version >= Version("1.2"): 999 return super().bracket_sql(expression) 1000 1001 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1002 this = expression.this 1003 if isinstance(this, exp.Array): 1004 this.replace(exp.paren(this)) 1005 1006 bracket = super().bracket_sql(expression) 1007 1008 if not expression.args.get("returns_list_for_maps"): 1009 if not this.type: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 this = annotate_types(this, dialect=self.dialect) 1013 1014 if this.is_type(exp.DataType.Type.MAP): 1015 bracket = f"({bracket})[1]" 1016 1017 return bracket 1018 1019 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1020 expression_sql = self.sql(expression, "expression") 1021 1022 func = expression.this 1023 if isinstance(func, exp.PERCENTILES): 1024 # Make the order key the first arg and slide the fraction to the right 1025 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1026 order_col = expression.find(exp.Ordered) 1027 if order_col: 1028 func.set("expression", func.this) 1029 func.set("this", order_col.this) 1030 1031 this = self.sql(expression, "this").rstrip(")") 1032 1033 return f"{this}{expression_sql})" 1034 1035 def length_sql(self, expression: exp.Length) -> str: 1036 arg = expression.this 1037 1038 # Dialects like BQ and Snowflake also accept binary values as args, so 1039 # DDB will attempt to infer the type or resort to case/when resolution 1040 if not expression.args.get("binary") or arg.is_string: 1041 return self.func("LENGTH", arg) 1042 1043 if not arg.type: 1044 from sqlglot.optimizer.annotate_types import annotate_types 1045 1046 arg = annotate_types(arg, dialect=self.dialect) 1047 1048 if arg.is_type(*exp.DataType.TEXT_TYPES): 1049 return self.func("LENGTH", arg) 1050 1051 # We need these casts to make duckdb's static type checker happy 1052 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1053 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1054 1055 case = ( 1056 exp.case(self.func("TYPEOF", arg)) 1057 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1058 .else_( 1059 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1060 ) # anonymous to break length_sql recursion 1061 ) 1062 1063 return self.sql(case) 1064 1065 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1066 this = expression.this 1067 key = expression.args.get("key") 1068 key_sql = key.name if isinstance(key, exp.Expression) else "" 1069 value_sql = self.sql(expression, "value") 1070 1071 kv_sql = f"{key_sql} := {value_sql}" 1072 1073 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1074 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1075 if isinstance(this, exp.Struct) and not this.expressions: 1076 return self.func("STRUCT_PACK", kv_sql) 1077 1078 return self.func("STRUCT_INSERT", this, kv_sql) 1079 1080 def unnest_sql(self, expression: exp.Unnest) -> str: 1081 explode_array = expression.args.get("explode_array") 1082 if explode_array: 1083 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1084 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1085 expression.expressions.append( 1086 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1087 ) 1088 1089 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1090 alias = expression.args.get("alias") 1091 if alias: 1092 expression.set("alias", None) 1093 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1094 1095 unnest_sql = super().unnest_sql(expression) 1096 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1097 return self.sql(select) 1098 1099 return super().unnest_sql(expression) 1100 1101 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1102 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1103 # DuckDB should render IGNORE NULLS only for the general-purpose 1104 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1105 return super().ignorenulls_sql(expression) 1106 1107 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1108 return self.sql(expression, "this") 1109 1110 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1111 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1112 # DuckDB should render RESPECT NULLS only for the general-purpose 1113 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1114 return super().respectnulls_sql(expression) 1115 1116 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1117 return self.sql(expression, "this") 1118 1119 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1120 this = self.sql(expression, "this") 1121 null_text = self.sql(expression, "null") 1122 1123 if null_text: 1124 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1125 1126 return self.func("ARRAY_TO_STRING", this, expression.expression) 1127 1128 @unsupported_args("position", "occurrence") 1129 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1130 group = expression.args.get("group") 1131 params = expression.args.get("parameters") 1132 1133 # Do not render group if there is no following argument, 1134 # and it's the default value for this dialect 1135 if ( 1136 not params 1137 and group 1138 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1139 ): 1140 group = None 1141 return self.func( 1142 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1143 ) 1144 1145 @unsupported_args("culture") 1146 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1147 fmt = expression.args.get("format") 1148 if fmt and fmt.is_int: 1149 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1150 1151 self.unsupported("Only integer formats are supported by NumberToStr") 1152 return self.function_fallback_sql(expression) 1153 1154 def autoincrementcolumnconstraint_sql(self, _) -> str: 1155 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1156 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
895 def strtodate_sql(self, expression: exp.StrToDate) -> str: 896 if expression.args.get("safe"): 897 formatted_time = self.format_time(expression) 898 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 899 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
907 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 908 nano = expression.args.get("nano") 909 if nano is not None: 910 expression.set( 911 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 912 ) 913 914 return rename_func("MAKE_TIME")(self, expression)
916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 sec = expression.args["sec"] 918 919 milli = expression.args.get("milli") 920 if milli is not None: 921 sec += milli.pop() / exp.Literal.number(1000.0) 922 923 nano = expression.args.get("nano") 924 if nano is not None: 925 sec += nano.pop() / exp.Literal.number(1000000000.0) 926 927 if milli or nano: 928 expression.set("sec", sec) 929 930 return rename_func("MAKE_TIMESTAMP")(self, expression)
932 def tablesample_sql( 933 self, 934 expression: exp.TableSample, 935 tablesample_keyword: t.Optional[str] = None, 936 ) -> str: 937 if not isinstance(expression.parent, exp.Select): 938 # This sample clause only applies to a single source, not the entire resulting relation 939 tablesample_keyword = "TABLESAMPLE" 940 941 if expression.args.get("size"): 942 method = expression.args.get("method") 943 if method and method.name.upper() != "RESERVOIR": 944 self.unsupported( 945 f"Sampling method {method} is not supported with a discrete sample count, " 946 "defaulting to reservoir sampling" 947 ) 948 expression.set("method", exp.var("RESERVOIR")) 949 950 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
952 def interval_sql(self, expression: exp.Interval) -> str: 953 multiplier: t.Optional[int] = None 954 unit = expression.text("unit").lower() 955 956 if unit.startswith("week"): 957 multiplier = 7 958 if unit.startswith("quarter"): 959 multiplier = 90 960 961 if multiplier: 962 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 963 964 return super().interval_sql(expression)
971 def join_sql(self, expression: exp.Join) -> str: 972 if ( 973 expression.side == "LEFT" 974 and not expression.args.get("on") 975 and isinstance(expression.this, exp.Unnest) 976 ): 977 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 978 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 979 return super().join_sql(expression.on(exp.true())) 980 981 return super().join_sql(expression)
997 def bracket_sql(self, expression: exp.Bracket) -> str: 998 if self.dialect.version >= Version("1.2"): 999 return super().bracket_sql(expression) 1000 1001 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1002 this = expression.this 1003 if isinstance(this, exp.Array): 1004 this.replace(exp.paren(this)) 1005 1006 bracket = super().bracket_sql(expression) 1007 1008 if not expression.args.get("returns_list_for_maps"): 1009 if not this.type: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 this = annotate_types(this, dialect=self.dialect) 1013 1014 if this.is_type(exp.DataType.Type.MAP): 1015 bracket = f"({bracket})[1]" 1016 1017 return bracket
1019 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1020 expression_sql = self.sql(expression, "expression") 1021 1022 func = expression.this 1023 if isinstance(func, exp.PERCENTILES): 1024 # Make the order key the first arg and slide the fraction to the right 1025 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1026 order_col = expression.find(exp.Ordered) 1027 if order_col: 1028 func.set("expression", func.this) 1029 func.set("this", order_col.this) 1030 1031 this = self.sql(expression, "this").rstrip(")") 1032 1033 return f"{this}{expression_sql})"
1035 def length_sql(self, expression: exp.Length) -> str: 1036 arg = expression.this 1037 1038 # Dialects like BQ and Snowflake also accept binary values as args, so 1039 # DDB will attempt to infer the type or resort to case/when resolution 1040 if not expression.args.get("binary") or arg.is_string: 1041 return self.func("LENGTH", arg) 1042 1043 if not arg.type: 1044 from sqlglot.optimizer.annotate_types import annotate_types 1045 1046 arg = annotate_types(arg, dialect=self.dialect) 1047 1048 if arg.is_type(*exp.DataType.TEXT_TYPES): 1049 return self.func("LENGTH", arg) 1050 1051 # We need these casts to make duckdb's static type checker happy 1052 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1053 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1054 1055 case = ( 1056 exp.case(self.func("TYPEOF", arg)) 1057 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1058 .else_( 1059 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1060 ) # anonymous to break length_sql recursion 1061 ) 1062 1063 return self.sql(case)
1065 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1066 this = expression.this 1067 key = expression.args.get("key") 1068 key_sql = key.name if isinstance(key, exp.Expression) else "" 1069 value_sql = self.sql(expression, "value") 1070 1071 kv_sql = f"{key_sql} := {value_sql}" 1072 1073 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1074 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1075 if isinstance(this, exp.Struct) and not this.expressions: 1076 return self.func("STRUCT_PACK", kv_sql) 1077 1078 return self.func("STRUCT_INSERT", this, kv_sql)
1080 def unnest_sql(self, expression: exp.Unnest) -> str: 1081 explode_array = expression.args.get("explode_array") 1082 if explode_array: 1083 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1084 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1085 expression.expressions.append( 1086 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1087 ) 1088 1089 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1090 alias = expression.args.get("alias") 1091 if alias: 1092 expression.set("alias", None) 1093 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1094 1095 unnest_sql = super().unnest_sql(expression) 1096 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1097 return self.sql(select) 1098 1099 return super().unnest_sql(expression)
1101 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1102 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1103 # DuckDB should render IGNORE NULLS only for the general-purpose 1104 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1105 return super().ignorenulls_sql(expression) 1106 1107 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1108 return self.sql(expression, "this")
1110 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1111 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1112 # DuckDB should render RESPECT NULLS only for the general-purpose 1113 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1114 return super().respectnulls_sql(expression) 1115 1116 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1117 return self.sql(expression, "this")
1119 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1120 this = self.sql(expression, "this") 1121 null_text = self.sql(expression, "null") 1122 1123 if null_text: 1124 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1125 1126 return self.func("ARRAY_TO_STRING", this, expression.expression)
1128 @unsupported_args("position", "occurrence") 1129 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1130 group = expression.args.get("group") 1131 params = expression.args.get("parameters") 1132 1133 # Do not render group if there is no following argument, 1134 # and it's the default value for this dialect 1135 if ( 1136 not params 1137 and group 1138 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1139 ): 1140 group = None 1141 return self.func( 1142 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1143 )
1145 @unsupported_args("culture") 1146 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1147 fmt = expression.args.get("format") 1148 if fmt and fmt.is_int: 1149 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1150 1151 self.unsupported("Only integer formats are supported by NumberToStr") 1152 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- put_sql