sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_delta_to_binary_interval_op, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 no_datetime_sql, 22 encode_decode_sql, 23 build_formatted_time, 24 inline_array_unless_query, 25 no_comment_column_constraint_sql, 26 no_time_sql, 27 no_timestamp_sql, 28 pivot_column_names, 29 rename_func, 30 remove_from_array_using_filter, 31 strposition_sql, 32 str_to_time_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47 48# BigQuery -> DuckDB conversion for the DATE function 49def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 50 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 51 zone = self.sql(expression, "zone") 52 53 if zone: 54 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 55 date_str = f"{date_str} || ' ' || {zone}" 56 57 # This will create a TIMESTAMP with time zone information 58 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 59 60 return result 61 62 63# BigQuery -> DuckDB conversion for the TIME_DIFF function 64def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 65 this = exp.cast(expression.this, exp.DataType.Type.TIME) 66 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 67 68 # Although the 2 dialects share similar signatures, BQ seems to inverse 69 # the sign of the result so the start/end time operands are flipped 70 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 71 72 73@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 74def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 75 return self.func("ARRAY_SORT", expression.this) 76 77 78def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 79 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 80 return self.func(name, expression.this) 81 82 83def _build_sort_array_desc(args: t.List) -> exp.Expression: 84 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 85 86 87def _build_date_diff(args: t.List) -> exp.Expression: 88 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 89 90 91def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 92 def _builder(args: t.List) -> exp.GenerateSeries: 93 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 94 if len(args) == 1: 95 # DuckDB uses 0 as a default for the series' start when it's omitted 96 args.insert(0, exp.Literal.number("0")) 97 98 gen_series = exp.GenerateSeries.from_arg_list(args) 99 gen_series.set("is_end_exclusive", end_exclusive) 100 101 return gen_series 102 103 return _builder 104 105 106def _build_make_timestamp(args: t.List) -> exp.Expression: 107 if len(args) == 1: 108 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 109 110 return exp.TimestampFromParts( 111 year=seq_get(args, 0), 112 month=seq_get(args, 1), 113 day=seq_get(args, 2), 114 hour=seq_get(args, 3), 115 min=seq_get(args, 4), 116 sec=seq_get(args, 5), 117 ) 118 119 120def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 121 def _parse(self: DuckDB.Parser) -> exp.Show: 122 return self._parse_show_duckdb(*args, **kwargs) 123 124 return _parse 125 126 127def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 128 args: t.List[str] = [] 129 130 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 131 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 132 # The transformation to ROW will take place if: 133 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 134 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 135 ancestor_cast = expression.find_ancestor(exp.Cast) 136 is_bq_inline_struct = ( 137 (expression.find(exp.PropertyEQ) is None) 138 and ancestor_cast 139 and any( 140 casted_type.is_type(exp.DataType.Type.STRUCT) 141 for casted_type in ancestor_cast.find_all(exp.DataType) 142 ) 143 ) 144 145 for i, expr in enumerate(expression.expressions): 146 is_property_eq = isinstance(expr, exp.PropertyEQ) 147 value = expr.expression if is_property_eq else expr 148 149 if is_bq_inline_struct: 150 args.append(self.sql(value)) 151 else: 152 if is_property_eq: 153 if isinstance(expr.this, exp.Identifier): 154 key = self.sql(exp.Literal.string(expr.name)) 155 else: 156 key = self.sql(expr.this) 157 else: 158 key = self.sql(exp.Literal.string(f"_{i}")) 159 160 args.append(f"{key}: {self.sql(value)}") 161 162 csv_args = ", ".join(args) 163 164 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 165 166 167def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 168 if expression.is_type("array"): 169 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 170 171 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 172 if expression.is_type( 173 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 174 ): 175 return expression.this.value 176 177 return self.datatype_sql(expression) 178 179 180def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 181 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 182 return f"CAST({sql} AS TEXT)" 183 184 185def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 186 scale = expression.args.get("scale") 187 timestamp = expression.this 188 189 if scale in (None, exp.UnixToTime.SECONDS): 190 return self.func("TO_TIMESTAMP", timestamp) 191 if scale == exp.UnixToTime.MILLIS: 192 return self.func("EPOCH_MS", timestamp) 193 if scale == exp.UnixToTime.MICROS: 194 return self.func("MAKE_TIMESTAMP", timestamp) 195 196 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 197 198 199WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 200 201 202def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 203 arrow_sql = arrow_json_extract_sql(self, expression) 204 if not expression.same_parent and isinstance( 205 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 206 ): 207 arrow_sql = self.wrap(arrow_sql) 208 return arrow_sql 209 210 211def _implicit_datetime_cast( 212 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 213) -> t.Optional[exp.Expression]: 214 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 215 216 217def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 218 this = _implicit_datetime_cast(expression.this) 219 expr = _implicit_datetime_cast(expression.expression) 220 221 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 222 223 224def _generate_datetime_array_sql( 225 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 226) -> str: 227 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 228 229 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 230 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 231 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 232 233 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 234 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 235 start=start, end=end, step=expression.args.get("step") 236 ) 237 238 if is_generate_date_array: 239 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 240 # GENERATE_DATE_ARRAY we must cast it back to DATE array 241 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 242 243 return self.sql(gen_series) 244 245 246def _json_extract_value_array_sql( 247 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 248) -> str: 249 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 250 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 251 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 252 253 254class DuckDB(Dialect): 255 NULL_ORDERING = "nulls_are_last" 256 SUPPORTS_USER_DEFINED_TYPES = True 257 SAFE_DIVISION = True 258 INDEX_OFFSET = 1 259 CONCAT_COALESCE = True 260 SUPPORTS_ORDER_BY_ALL = True 261 SUPPORTS_FIXED_SIZE_ARRAYS = True 262 STRICT_JSON_PATH_SYNTAX = False 263 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 264 265 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 266 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 267 268 DATE_PART_MAPPING = { 269 **Dialect.DATE_PART_MAPPING, 270 "DAYOFWEEKISO": "ISODOW", 271 } 272 DATE_PART_MAPPING.pop("WEEKDAY") 273 274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path) 285 286 class Tokenizer(tokens.Tokenizer): 287 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 288 HEREDOC_STRINGS = ["$"] 289 290 HEREDOC_TAG_IS_IDENTIFIER = True 291 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "//": TokenType.DIV, 296 "**": TokenType.DSTAR, 297 "^@": TokenType.CARET_AT, 298 "@>": TokenType.AT_GT, 299 "<@": TokenType.LT_AT, 300 "ATTACH": TokenType.ATTACH, 301 "BINARY": TokenType.VARBINARY, 302 "BITSTRING": TokenType.BIT, 303 "BPCHAR": TokenType.TEXT, 304 "CHAR": TokenType.TEXT, 305 "DATETIME": TokenType.TIMESTAMPNTZ, 306 "DETACH": TokenType.DETACH, 307 "FORCE": TokenType.FORCE, 308 "INSTALL": TokenType.INSTALL, 309 "LOGICAL": TokenType.BOOLEAN, 310 "ONLY": TokenType.ONLY, 311 "PIVOT_WIDER": TokenType.PIVOT, 312 "POSITIONAL": TokenType.POSITIONAL, 313 "RESET": TokenType.COMMAND, 314 "SIGNED": TokenType.INT, 315 "STRING": TokenType.TEXT, 316 "SUMMARIZE": TokenType.SUMMARIZE, 317 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 318 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 319 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 320 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 321 "TIMESTAMP_US": TokenType.TIMESTAMP, 322 "UBIGINT": TokenType.UBIGINT, 323 "UINTEGER": TokenType.UINT, 324 "USMALLINT": TokenType.USMALLINT, 325 "UTINYINT": TokenType.UTINYINT, 326 "VARCHAR": TokenType.TEXT, 327 } 328 KEYWORDS.pop("/*+") 329 330 SINGLE_TOKENS = { 331 **tokens.Tokenizer.SINGLE_TOKENS, 332 "$": TokenType.PARAMETER, 333 } 334 335 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 336 337 class Parser(parser.Parser): 338 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 339 340 BITWISE = { 341 **parser.Parser.BITWISE, 342 TokenType.TILDA: exp.RegexpLike, 343 } 344 BITWISE.pop(TokenType.CARET) 345 346 RANGE_PARSERS = { 347 **parser.Parser.RANGE_PARSERS, 348 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 349 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 350 } 351 352 EXPONENT = { 353 **parser.Parser.EXPONENT, 354 TokenType.CARET: exp.Pow, 355 TokenType.DSTAR: exp.Pow, 356 } 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 359 360 SHOW_PARSERS = { 361 "TABLES": _show_parser("TABLES"), 362 "ALL TABLES": _show_parser("ALL TABLES"), 363 } 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 371 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 372 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 373 "DATEDIFF": _build_date_diff, 374 "DATE_DIFF": _build_date_diff, 375 "DATE_TRUNC": date_trunc_to_time, 376 "DATETRUNC": date_trunc_to_time, 377 "DECODE": lambda args: exp.Decode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EDITDIST3": exp.Levenshtein.from_arg_list, 381 "ENCODE": lambda args: exp.Encode( 382 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 383 ), 384 "EPOCH": exp.TimeToUnix.from_arg_list, 385 "EPOCH_MS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 387 ), 388 "GENERATE_SERIES": _build_generate_series(), 389 "JSON": exp.ParseJSON.from_arg_list, 390 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 391 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 392 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 393 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 394 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 395 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 396 "LIST_HAS": exp.ArrayContains.from_arg_list, 397 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 398 "LIST_REVERSE_SORT": _build_sort_array_desc, 399 "LIST_SORT": exp.SortArray.from_arg_list, 400 "LIST_TRANSFORM": exp.Transform.from_arg_list, 401 "LIST_VALUE": lambda args: exp.Array(expressions=args), 402 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 403 "MAKE_TIMESTAMP": _build_make_timestamp, 404 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 405 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 406 "RANGE": _build_generate_series(end_exclusive=True), 407 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 408 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 409 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 410 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 411 this=seq_get(args, 0), 412 expression=seq_get(args, 1), 413 replacement=seq_get(args, 2), 414 modifiers=seq_get(args, 3), 415 ), 416 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 417 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 418 "STRING_SPLIT": exp.Split.from_arg_list, 419 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 420 "STRING_TO_ARRAY": exp.Split.from_arg_list, 421 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 422 "STRUCT_PACK": exp.Struct.from_arg_list, 423 "STR_SPLIT": exp.Split.from_arg_list, 424 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 425 "TIME_BUCKET": exp.DateBin.from_arg_list, 426 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 427 "UNNEST": exp.Explode.from_arg_list, 428 "XOR": binary_from_function(exp.BitwiseXor), 429 } 430 431 FUNCTIONS.pop("DATE_SUB") 432 FUNCTIONS.pop("GLOB") 433 434 FUNCTION_PARSERS = { 435 **parser.Parser.FUNCTION_PARSERS, 436 **dict.fromkeys( 437 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 438 ), 439 } 440 FUNCTION_PARSERS.pop("DECODE") 441 442 NO_PAREN_FUNCTION_PARSERS = { 443 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 444 "MAP": lambda self: self._parse_map(), 445 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 446 } 447 448 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 449 TokenType.SEMI, 450 TokenType.ANTI, 451 } 452 453 PLACEHOLDER_PARSERS = { 454 **parser.Parser.PLACEHOLDER_PARSERS, 455 TokenType.PARAMETER: lambda self: ( 456 self.expression(exp.Placeholder, this=self._prev.text) 457 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 458 else None 459 ), 460 } 461 462 TYPE_CONVERTERS = { 463 # https://duckdb.org/docs/sql/data_types/numeric 464 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 465 # https://duckdb.org/docs/sql/data_types/text 466 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 467 } 468 469 STATEMENT_PARSERS = { 470 **parser.Parser.STATEMENT_PARSERS, 471 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 472 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 473 TokenType.FORCE: lambda self: self._parse_force(), 474 TokenType.INSTALL: lambda self: self._parse_install(), 475 TokenType.SHOW: lambda self: self._parse_show(), 476 } 477 478 SET_PARSERS = { 479 **parser.Parser.SET_PARSERS, 480 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 481 } 482 483 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 484 index = self._index 485 if not self._match_text_seq("LAMBDA"): 486 return super()._parse_lambda(alias=alias) 487 488 expressions = self._parse_csv(self._parse_lambda_arg) 489 if not self._match(TokenType.COLON): 490 self._retreat(index) 491 return None 492 493 this = self._replace_lambda(self._parse_assignment(), expressions) 494 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 495 496 def _parse_expression(self) -> t.Optional[exp.Expression]: 497 # DuckDB supports prefix aliases, e.g. foo: 1 498 if self._next and self._next.token_type == TokenType.COLON: 499 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 500 self._match(TokenType.COLON) 501 comments = self._prev_comments or [] 502 503 this = self._parse_assignment() 504 if isinstance(this, exp.Expression): 505 # Moves the comment next to the alias in `alias: expr /* comment */` 506 comments += this.pop_comments() or [] 507 508 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 509 510 return super()._parse_expression() 511 512 def _parse_table( 513 self, 514 schema: bool = False, 515 joins: bool = False, 516 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 517 parse_bracket: bool = False, 518 is_db_reference: bool = False, 519 parse_partition: bool = False, 520 consume_pipe: bool = False, 521 ) -> t.Optional[exp.Expression]: 522 # DuckDB supports prefix aliases, e.g. FROM foo: bar 523 if self._next and self._next.token_type == TokenType.COLON: 524 alias = self._parse_table_alias( 525 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 526 ) 527 self._match(TokenType.COLON) 528 comments = self._prev_comments or [] 529 else: 530 alias = None 531 comments = [] 532 533 table = super()._parse_table( 534 schema=schema, 535 joins=joins, 536 alias_tokens=alias_tokens, 537 parse_bracket=parse_bracket, 538 is_db_reference=is_db_reference, 539 parse_partition=parse_partition, 540 ) 541 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 542 # Moves the comment next to the alias in `alias: table /* comment */` 543 comments += table.pop_comments() or [] 544 alias.comments = alias.pop_comments() + comments 545 table.set("alias", alias) 546 547 return table 548 549 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 550 # https://duckdb.org/docs/sql/samples.html 551 sample = super()._parse_table_sample(as_modifier=as_modifier) 552 if sample and not sample.args.get("method"): 553 if sample.args.get("size"): 554 sample.set("method", exp.var("RESERVOIR")) 555 else: 556 sample.set("method", exp.var("SYSTEM")) 557 558 return sample 559 560 def _parse_bracket( 561 self, this: t.Optional[exp.Expression] = None 562 ) -> t.Optional[exp.Expression]: 563 bracket = super()._parse_bracket(this) 564 565 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 566 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 567 bracket.set("returns_list_for_maps", True) 568 569 return bracket 570 571 def _parse_map(self) -> exp.ToMap | exp.Map: 572 if self._match(TokenType.L_BRACE, advance=False): 573 return self.expression(exp.ToMap, this=self._parse_bracket()) 574 575 args = self._parse_wrapped_csv(self._parse_assignment) 576 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 577 578 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 579 return self._parse_field_def() 580 581 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 582 if len(aggregations) == 1: 583 return super()._pivot_column_names(aggregations) 584 return pivot_column_names(aggregations, dialect="duckdb") 585 586 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 587 def _parse_attach_option() -> exp.AttachOption: 588 return self.expression( 589 exp.AttachOption, 590 this=self._parse_var(any_token=True), 591 expression=self._parse_field(any_token=True), 592 ) 593 594 self._match(TokenType.DATABASE) 595 exists = self._parse_exists(not_=is_attach) 596 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 597 598 if self._match(TokenType.L_PAREN, advance=False): 599 expressions = self._parse_wrapped_csv(_parse_attach_option) 600 else: 601 expressions = None 602 603 return ( 604 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 605 if is_attach 606 else self.expression(exp.Detach, this=this, exists=exists) 607 ) 608 609 def _parse_show_duckdb(self, this: str) -> exp.Show: 610 return self.expression(exp.Show, this=this) 611 612 def _parse_force(self) -> exp.Install | exp.Command: 613 # FORCE can only be followed by INSTALL or CHECKPOINT 614 # In the case of CHECKPOINT, we fallback 615 if not self._match(TokenType.INSTALL): 616 return self._parse_as_command(self._prev) 617 618 return self._parse_install(force=True) 619 620 def _parse_install(self, force: bool = False) -> exp.Install: 621 return self.expression( 622 exp.Install, 623 **{ # type: ignore 624 "this": self._parse_id_var(), 625 "from": self._parse_var_or_string() if self._match(TokenType.FROM) else None, 626 "force": force, 627 }, 628 ) 629 630 def _parse_primary(self) -> t.Optional[exp.Expression]: 631 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 632 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 633 634 return super()._parse_primary() 635 636 class Generator(generator.Generator): 637 PARAMETER_TOKEN = "$" 638 NAMED_PLACEHOLDER_TOKEN = "$" 639 JOIN_HINTS = False 640 TABLE_HINTS = False 641 QUERY_HINTS = False 642 LIMIT_FETCH = "LIMIT" 643 STRUCT_DELIMITER = ("(", ")") 644 RENAME_TABLE_WITH_DB = False 645 NVL2_SUPPORTED = False 646 SEMI_ANTI_JOIN_WITH_SIDE = False 647 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 648 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 649 LAST_DAY_SUPPORTS_DATE_PART = False 650 JSON_KEY_VALUE_PAIR_SEP = "," 651 IGNORE_NULLS_IN_FUNC = True 652 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 653 SUPPORTS_CREATE_TABLE_LIKE = False 654 MULTI_ARG_DISTINCT = False 655 CAN_IMPLEMENT_ARRAY_ANY = True 656 SUPPORTS_TO_NUMBER = False 657 SUPPORTS_WINDOW_EXCLUDE = True 658 COPY_HAS_INTO_KEYWORD = False 659 STAR_EXCEPT = "EXCLUDE" 660 PAD_FILL_PATTERN_IS_REQUIRED = True 661 ARRAY_CONCAT_IS_VAR_LEN = False 662 ARRAY_SIZE_DIM_REQUIRED = False 663 NORMALIZE_EXTRACT_DATE_PARTS = True 664 SUPPORTS_LIKE_QUANTIFIERS = False 665 666 TRANSFORMS = { 667 **generator.Generator.TRANSFORMS, 668 exp.ApproxDistinct: approx_count_distinct_sql, 669 exp.Array: inline_array_unless_query, 670 exp.ArrayFilter: rename_func("LIST_FILTER"), 671 exp.ArrayRemove: remove_from_array_using_filter, 672 exp.ArraySort: _array_sort_sql, 673 exp.ArraySum: rename_func("LIST_SUM"), 674 exp.ArrayUniqueAgg: lambda self, e: self.func( 675 "LIST", exp.Distinct(expressions=[e.this]) 676 ), 677 exp.BitwiseAndAgg: rename_func("BIT_AND"), 678 exp.BitwiseOrAgg: rename_func("BIT_OR"), 679 exp.BitwiseXor: rename_func("XOR"), 680 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 681 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 682 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 683 exp.CurrentDate: lambda *_: "CURRENT_DATE", 684 exp.CurrentTime: lambda *_: "CURRENT_TIME", 685 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 686 exp.DayOfMonth: rename_func("DAYOFMONTH"), 687 exp.DayOfWeek: rename_func("DAYOFWEEK"), 688 exp.DayOfWeekIso: rename_func("ISODOW"), 689 exp.DayOfYear: rename_func("DAYOFYEAR"), 690 exp.DataType: _datatype_sql, 691 exp.Date: _date_sql, 692 exp.DateAdd: date_delta_to_binary_interval_op(), 693 exp.DateFromParts: rename_func("MAKE_DATE"), 694 exp.DateSub: date_delta_to_binary_interval_op(), 695 exp.DateDiff: _date_diff_sql, 696 exp.DateStrToDate: datestrtodate_sql, 697 exp.Datetime: no_datetime_sql, 698 exp.DatetimeSub: date_delta_to_binary_interval_op(), 699 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 700 exp.DateToDi: lambda self, 701 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 702 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 703 exp.DiToDate: lambda self, 704 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 705 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 706 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 707 exp.GenerateDateArray: _generate_datetime_array_sql, 708 exp.GenerateTimestampArray: _generate_datetime_array_sql, 709 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 710 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 711 exp.Explode: rename_func("UNNEST"), 712 exp.IntDiv: lambda self, e: self.binary(e, "//"), 713 exp.IsInf: rename_func("ISINF"), 714 exp.IsNan: rename_func("ISNAN"), 715 exp.JSONBExists: rename_func("JSON_EXISTS"), 716 exp.JSONExtract: _arrow_json_extract_sql, 717 exp.JSONExtractArray: _json_extract_value_array_sql, 718 exp.JSONExtractScalar: _arrow_json_extract_sql, 719 exp.JSONFormat: _json_format_sql, 720 exp.JSONValueArray: _json_extract_value_array_sql, 721 exp.Lateral: explode_to_unnest_sql, 722 exp.LogicalOr: rename_func("BOOL_OR"), 723 exp.LogicalAnd: rename_func("BOOL_AND"), 724 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 725 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 726 exp.MonthsBetween: lambda self, e: self.func( 727 "DATEDIFF", 728 "'month'", 729 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 730 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 731 ), 732 exp.PercentileCont: rename_func("QUANTILE_CONT"), 733 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 734 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 735 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 736 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 737 exp.RegexpReplace: lambda self, e: self.func( 738 "REGEXP_REPLACE", 739 e.this, 740 e.expression, 741 e.args.get("replacement"), 742 e.args.get("modifiers"), 743 ), 744 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 745 exp.RegexpILike: lambda self, e: self.func( 746 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 747 ), 748 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 749 exp.Return: lambda self, e: self.sql(e, "this"), 750 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 751 exp.Rand: rename_func("RANDOM"), 752 exp.SHA: rename_func("SHA1"), 753 exp.SHA2: sha256_sql, 754 exp.Split: rename_func("STR_SPLIT"), 755 exp.SortArray: _sort_array_sql, 756 exp.StrPosition: strposition_sql, 757 exp.StrToUnix: lambda self, e: self.func( 758 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 759 ), 760 exp.Struct: _struct_sql, 761 exp.Transform: rename_func("LIST_TRANSFORM"), 762 exp.TimeAdd: date_delta_to_binary_interval_op(), 763 exp.Time: no_time_sql, 764 exp.TimeDiff: _timediff_sql, 765 exp.Timestamp: no_timestamp_sql, 766 exp.TimestampDiff: lambda self, e: self.func( 767 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 768 ), 769 exp.TimestampTrunc: timestamptrunc_sql(), 770 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 771 exp.TimeStrToTime: timestrtotime_sql, 772 exp.TimeStrToUnix: lambda self, e: self.func( 773 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 774 ), 775 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 776 exp.TimeToUnix: rename_func("EPOCH"), 777 exp.TsOrDiToDi: lambda self, 778 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 779 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 780 exp.TsOrDsDiff: lambda self, e: self.func( 781 "DATE_DIFF", 782 f"'{e.args.get('unit') or 'DAY'}'", 783 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 784 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 785 ), 786 exp.UnixToStr: lambda self, e: self.func( 787 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 788 ), 789 exp.DatetimeTrunc: lambda self, e: self.func( 790 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 791 ), 792 exp.UnixToTime: _unix_to_time_sql, 793 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 794 exp.VariancePop: rename_func("VAR_POP"), 795 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 796 exp.Xor: bool_xor_sql, 797 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 798 rename_func("LEVENSHTEIN") 799 ), 800 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 801 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 802 exp.DateBin: rename_func("TIME_BUCKET"), 803 } 804 805 SUPPORTED_JSON_PATH_PARTS = { 806 exp.JSONPathKey, 807 exp.JSONPathRoot, 808 exp.JSONPathSubscript, 809 exp.JSONPathWildcard, 810 } 811 812 TYPE_MAPPING = { 813 **generator.Generator.TYPE_MAPPING, 814 exp.DataType.Type.BINARY: "BLOB", 815 exp.DataType.Type.BPCHAR: "TEXT", 816 exp.DataType.Type.CHAR: "TEXT", 817 exp.DataType.Type.DATETIME: "TIMESTAMP", 818 exp.DataType.Type.FLOAT: "REAL", 819 exp.DataType.Type.JSONB: "JSON", 820 exp.DataType.Type.NCHAR: "TEXT", 821 exp.DataType.Type.NVARCHAR: "TEXT", 822 exp.DataType.Type.UINT: "UINTEGER", 823 exp.DataType.Type.VARBINARY: "BLOB", 824 exp.DataType.Type.ROWVERSION: "BLOB", 825 exp.DataType.Type.VARCHAR: "TEXT", 826 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 827 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 828 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 829 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 830 } 831 832 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 833 RESERVED_KEYWORDS = { 834 "array", 835 "analyse", 836 "union", 837 "all", 838 "when", 839 "in_p", 840 "default", 841 "create_p", 842 "window", 843 "asymmetric", 844 "to", 845 "else", 846 "localtime", 847 "from", 848 "end_p", 849 "select", 850 "current_date", 851 "foreign", 852 "with", 853 "grant", 854 "session_user", 855 "or", 856 "except", 857 "references", 858 "fetch", 859 "limit", 860 "group_p", 861 "leading", 862 "into", 863 "collate", 864 "offset", 865 "do", 866 "then", 867 "localtimestamp", 868 "check_p", 869 "lateral_p", 870 "current_role", 871 "where", 872 "asc_p", 873 "placing", 874 "desc_p", 875 "user", 876 "unique", 877 "initially", 878 "column", 879 "both", 880 "some", 881 "as", 882 "any", 883 "only", 884 "deferrable", 885 "null_p", 886 "current_time", 887 "true_p", 888 "table", 889 "case", 890 "trailing", 891 "variadic", 892 "for", 893 "on", 894 "distinct", 895 "false_p", 896 "not", 897 "constraint", 898 "current_timestamp", 899 "returning", 900 "primary", 901 "intersect", 902 "having", 903 "analyze", 904 "current_user", 905 "and", 906 "cast", 907 "symmetric", 908 "using", 909 "order", 910 "current_catalog", 911 } 912 913 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 914 915 # DuckDB doesn't generally support CREATE TABLE .. properties 916 # https://duckdb.org/docs/sql/statements/create_table.html 917 PROPERTIES_LOCATION = { 918 prop: exp.Properties.Location.UNSUPPORTED 919 for prop in generator.Generator.PROPERTIES_LOCATION 920 } 921 922 # There are a few exceptions (e.g. temporary tables) which are supported or 923 # can be transpiled to DuckDB, so we explicitly override them accordingly 924 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 925 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 926 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 927 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 928 929 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 930 exp.FirstValue, 931 exp.Lag, 932 exp.LastValue, 933 exp.Lead, 934 exp.NthValue, 935 ) 936 937 def lambda_sql( 938 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 939 ) -> str: 940 if expression.args.get("colon"): 941 prefix = "LAMBDA " 942 arrow_sep = ":" 943 wrap = False 944 else: 945 prefix = "" 946 947 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 948 return f"{prefix}{lambda_sql}" 949 950 def show_sql(self, expression: exp.Show) -> str: 951 return f"SHOW {expression.name}" 952 953 def install_sql(self, expression: exp.Install) -> str: 954 force = "FORCE " if expression.args.get("force") else "" 955 this = self.sql(expression, "this") 956 from_clause = expression.args.get("from") 957 from_clause = f" FROM {from_clause}" if from_clause else "" 958 return f"{force}INSTALL {this}{from_clause}" 959 960 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 961 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 962 963 def strtotime_sql(self, expression: exp.StrToTime) -> str: 964 if expression.args.get("safe"): 965 formatted_time = self.format_time(expression) 966 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 967 return str_to_time_sql(self, expression) 968 969 def strtodate_sql(self, expression: exp.StrToDate) -> str: 970 if expression.args.get("safe"): 971 formatted_time = self.format_time(expression) 972 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 973 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 974 975 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 976 arg = expression.this 977 if expression.args.get("safe"): 978 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 979 return self.func("JSON", arg) 980 981 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 982 nano = expression.args.get("nano") 983 if nano is not None: 984 expression.set( 985 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 986 ) 987 988 return rename_func("MAKE_TIME")(self, expression) 989 990 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 991 sec = expression.args["sec"] 992 993 milli = expression.args.get("milli") 994 if milli is not None: 995 sec += milli.pop() / exp.Literal.number(1000.0) 996 997 nano = expression.args.get("nano") 998 if nano is not None: 999 sec += nano.pop() / exp.Literal.number(1000000000.0) 1000 1001 if milli or nano: 1002 expression.set("sec", sec) 1003 1004 return rename_func("MAKE_TIMESTAMP")(self, expression) 1005 1006 def tablesample_sql( 1007 self, 1008 expression: exp.TableSample, 1009 tablesample_keyword: t.Optional[str] = None, 1010 ) -> str: 1011 if not isinstance(expression.parent, exp.Select): 1012 # This sample clause only applies to a single source, not the entire resulting relation 1013 tablesample_keyword = "TABLESAMPLE" 1014 1015 if expression.args.get("size"): 1016 method = expression.args.get("method") 1017 if method and method.name.upper() != "RESERVOIR": 1018 self.unsupported( 1019 f"Sampling method {method} is not supported with a discrete sample count, " 1020 "defaulting to reservoir sampling" 1021 ) 1022 expression.set("method", exp.var("RESERVOIR")) 1023 1024 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1025 1026 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1027 if isinstance(expression.parent, exp.UserDefinedFunction): 1028 return self.sql(expression, "this") 1029 return super().columndef_sql(expression, sep) 1030 1031 def join_sql(self, expression: exp.Join) -> str: 1032 if ( 1033 not expression.args.get("using") 1034 and not expression.args.get("on") 1035 and not expression.method 1036 and (expression.kind in ("", "INNER", "OUTER")) 1037 ): 1038 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 1039 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1040 if isinstance(expression.this, exp.Unnest): 1041 return super().join_sql(expression.on(exp.true())) 1042 1043 expression.args.pop("side", None) 1044 expression.args.pop("kind", None) 1045 1046 return super().join_sql(expression) 1047 1048 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1049 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1050 if expression.args.get("is_end_exclusive"): 1051 return rename_func("RANGE")(self, expression) 1052 1053 return self.function_fallback_sql(expression) 1054 1055 def countif_sql(self, expression: exp.CountIf) -> str: 1056 if self.dialect.version >= Version("1.2"): 1057 return self.function_fallback_sql(expression) 1058 1059 # https://github.com/tobymao/sqlglot/pull/4749 1060 return count_if_to_sum(self, expression) 1061 1062 def bracket_sql(self, expression: exp.Bracket) -> str: 1063 if self.dialect.version >= Version("1.2"): 1064 return super().bracket_sql(expression) 1065 1066 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1067 this = expression.this 1068 if isinstance(this, exp.Array): 1069 this.replace(exp.paren(this)) 1070 1071 bracket = super().bracket_sql(expression) 1072 1073 if not expression.args.get("returns_list_for_maps"): 1074 if not this.type: 1075 from sqlglot.optimizer.annotate_types import annotate_types 1076 1077 this = annotate_types(this, dialect=self.dialect) 1078 1079 if this.is_type(exp.DataType.Type.MAP): 1080 bracket = f"({bracket})[1]" 1081 1082 return bracket 1083 1084 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1085 expression_sql = self.sql(expression, "expression") 1086 1087 func = expression.this 1088 if isinstance(func, exp.PERCENTILES): 1089 # Make the order key the first arg and slide the fraction to the right 1090 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1091 order_col = expression.find(exp.Ordered) 1092 if order_col: 1093 func.set("expression", func.this) 1094 func.set("this", order_col.this) 1095 1096 this = self.sql(expression, "this").rstrip(")") 1097 1098 return f"{this}{expression_sql})" 1099 1100 def length_sql(self, expression: exp.Length) -> str: 1101 arg = expression.this 1102 1103 # Dialects like BQ and Snowflake also accept binary values as args, so 1104 # DDB will attempt to infer the type or resort to case/when resolution 1105 if not expression.args.get("binary") or arg.is_string: 1106 return self.func("LENGTH", arg) 1107 1108 if not arg.type: 1109 from sqlglot.optimizer.annotate_types import annotate_types 1110 1111 arg = annotate_types(arg, dialect=self.dialect) 1112 1113 if arg.is_type(*exp.DataType.TEXT_TYPES): 1114 return self.func("LENGTH", arg) 1115 1116 # We need these casts to make duckdb's static type checker happy 1117 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1118 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1119 1120 case = ( 1121 exp.case(self.func("TYPEOF", arg)) 1122 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1123 .else_( 1124 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1125 ) # anonymous to break length_sql recursion 1126 ) 1127 1128 return self.sql(case) 1129 1130 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1131 this = expression.this 1132 key = expression.args.get("key") 1133 key_sql = key.name if isinstance(key, exp.Expression) else "" 1134 value_sql = self.sql(expression, "value") 1135 1136 kv_sql = f"{key_sql} := {value_sql}" 1137 1138 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1139 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1140 if isinstance(this, exp.Struct) and not this.expressions: 1141 return self.func("STRUCT_PACK", kv_sql) 1142 1143 return self.func("STRUCT_INSERT", this, kv_sql) 1144 1145 def unnest_sql(self, expression: exp.Unnest) -> str: 1146 explode_array = expression.args.get("explode_array") 1147 if explode_array: 1148 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1149 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1150 expression.expressions.append( 1151 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1152 ) 1153 1154 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1155 alias = expression.args.get("alias") 1156 if isinstance(alias, exp.TableAlias): 1157 expression.set("alias", None) 1158 if alias.columns: 1159 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1160 1161 unnest_sql = super().unnest_sql(expression) 1162 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1163 return self.sql(select) 1164 1165 return super().unnest_sql(expression) 1166 1167 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1168 this = expression.this 1169 1170 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1171 # DuckDB should render IGNORE NULLS only for the general-purpose 1172 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1173 return super().ignorenulls_sql(expression) 1174 1175 if isinstance(this, exp.First): 1176 this = exp.AnyValue(this=this.this) 1177 1178 if not isinstance(this, exp.AnyValue): 1179 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1180 1181 return self.sql(this) 1182 1183 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1184 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1185 # DuckDB should render RESPECT NULLS only for the general-purpose 1186 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1187 return super().respectnulls_sql(expression) 1188 1189 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1190 return self.sql(expression, "this") 1191 1192 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1193 this = self.sql(expression, "this") 1194 null_text = self.sql(expression, "null") 1195 1196 if null_text: 1197 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1198 1199 return self.func("ARRAY_TO_STRING", this, expression.expression) 1200 1201 @unsupported_args("position", "occurrence") 1202 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1203 group = expression.args.get("group") 1204 params = expression.args.get("parameters") 1205 1206 # Do not render group if there is no following argument, 1207 # and it's the default value for this dialect 1208 if ( 1209 not params 1210 and group 1211 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1212 ): 1213 group = None 1214 return self.func( 1215 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1216 ) 1217 1218 @unsupported_args("culture") 1219 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1220 fmt = expression.args.get("format") 1221 if fmt and fmt.is_int: 1222 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1223 1224 self.unsupported("Only integer formats are supported by NumberToStr") 1225 return self.function_fallback_sql(expression) 1226 1227 def autoincrementcolumnconstraint_sql(self, _) -> str: 1228 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1229 return "" 1230 1231 def aliases_sql(self, expression: exp.Aliases) -> str: 1232 this = expression.this 1233 if isinstance(this, exp.Posexplode): 1234 return self.posexplode_sql(this) 1235 1236 return super().aliases_sql(expression) 1237 1238 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1239 this = expression.this 1240 parent = expression.parent 1241 1242 # The default Spark aliases are "pos" and "col", unless specified otherwise 1243 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1244 1245 if isinstance(parent, exp.Aliases): 1246 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1247 pos, col = parent.expressions 1248 elif isinstance(parent, exp.Table): 1249 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1250 alias = parent.args.get("alias") 1251 if alias: 1252 pos, col = alias.columns or [pos, col] 1253 alias.pop() 1254 1255 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1256 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1257 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1258 gen_subscripts = self.sql( 1259 exp.Alias( 1260 this=exp.Anonymous( 1261 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1262 ) 1263 - exp.Literal.number(1), 1264 alias=pos, 1265 ) 1266 ) 1267 1268 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1269 1270 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1271 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1272 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1273 1274 return posexplode_sql 1275 1276 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1277 this = expression.this 1278 1279 if not this.type: 1280 from sqlglot.optimizer.annotate_types import annotate_types 1281 1282 this = annotate_types(this, dialect=self.dialect) 1283 1284 if this.is_type(*exp.DataType.TEXT_TYPES): 1285 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1286 1287 func = self.func( 1288 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1289 ) 1290 1291 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1292 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1293 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1294 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1295 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1296 return self.sql(exp.Cast(this=func, to=this.type)) 1297 1298 return self.sql(func) 1299 1300 def format_sql(self, expression: exp.Format) -> str: 1301 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1302 return self.func("FORMAT", "'{}'", expression.expressions[0]) 1303 1304 return self.function_fallback_sql(expression)
255class DuckDB(Dialect): 256 NULL_ORDERING = "nulls_are_last" 257 SUPPORTS_USER_DEFINED_TYPES = True 258 SAFE_DIVISION = True 259 INDEX_OFFSET = 1 260 CONCAT_COALESCE = True 261 SUPPORTS_ORDER_BY_ALL = True 262 SUPPORTS_FIXED_SIZE_ARRAYS = True 263 STRICT_JSON_PATH_SYNTAX = False 264 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 265 266 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 DATE_PART_MAPPING = { 270 **Dialect.DATE_PART_MAPPING, 271 "DAYOFWEEKISO": "ISODOW", 272 } 273 DATE_PART_MAPPING.pop("WEEKDAY") 274 275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path) 286 287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "FORCE": TokenType.FORCE, 309 "INSTALL": TokenType.INSTALL, 310 "LOGICAL": TokenType.BOOLEAN, 311 "ONLY": TokenType.ONLY, 312 "PIVOT_WIDER": TokenType.PIVOT, 313 "POSITIONAL": TokenType.POSITIONAL, 314 "RESET": TokenType.COMMAND, 315 "SIGNED": TokenType.INT, 316 "STRING": TokenType.TEXT, 317 "SUMMARIZE": TokenType.SUMMARIZE, 318 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 319 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 320 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 321 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 322 "TIMESTAMP_US": TokenType.TIMESTAMP, 323 "UBIGINT": TokenType.UBIGINT, 324 "UINTEGER": TokenType.UINT, 325 "USMALLINT": TokenType.USMALLINT, 326 "UTINYINT": TokenType.UTINYINT, 327 "VARCHAR": TokenType.TEXT, 328 } 329 KEYWORDS.pop("/*+") 330 331 SINGLE_TOKENS = { 332 **tokens.Tokenizer.SINGLE_TOKENS, 333 "$": TokenType.PARAMETER, 334 } 335 336 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 337 338 class Parser(parser.Parser): 339 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 340 341 BITWISE = { 342 **parser.Parser.BITWISE, 343 TokenType.TILDA: exp.RegexpLike, 344 } 345 BITWISE.pop(TokenType.CARET) 346 347 RANGE_PARSERS = { 348 **parser.Parser.RANGE_PARSERS, 349 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 350 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 351 } 352 353 EXPONENT = { 354 **parser.Parser.EXPONENT, 355 TokenType.CARET: exp.Pow, 356 TokenType.DSTAR: exp.Pow, 357 } 358 359 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 360 361 SHOW_PARSERS = { 362 "TABLES": _show_parser("TABLES"), 363 "ALL TABLES": _show_parser("ALL TABLES"), 364 } 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 369 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 370 "ARRAY_SORT": exp.SortArray.from_arg_list, 371 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 372 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 373 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 374 "DATEDIFF": _build_date_diff, 375 "DATE_DIFF": _build_date_diff, 376 "DATE_TRUNC": date_trunc_to_time, 377 "DATETRUNC": date_trunc_to_time, 378 "DECODE": lambda args: exp.Decode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EDITDIST3": exp.Levenshtein.from_arg_list, 382 "ENCODE": lambda args: exp.Encode( 383 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 384 ), 385 "EPOCH": exp.TimeToUnix.from_arg_list, 386 "EPOCH_MS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "GENERATE_SERIES": _build_generate_series(), 390 "JSON": exp.ParseJSON.from_arg_list, 391 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 392 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 393 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 394 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 395 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 396 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 397 "LIST_HAS": exp.ArrayContains.from_arg_list, 398 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 399 "LIST_REVERSE_SORT": _build_sort_array_desc, 400 "LIST_SORT": exp.SortArray.from_arg_list, 401 "LIST_TRANSFORM": exp.Transform.from_arg_list, 402 "LIST_VALUE": lambda args: exp.Array(expressions=args), 403 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 404 "MAKE_TIMESTAMP": _build_make_timestamp, 405 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 406 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 407 "RANGE": _build_generate_series(end_exclusive=True), 408 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 409 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 410 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 411 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 412 this=seq_get(args, 0), 413 expression=seq_get(args, 1), 414 replacement=seq_get(args, 2), 415 modifiers=seq_get(args, 3), 416 ), 417 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 418 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 419 "STRING_SPLIT": exp.Split.from_arg_list, 420 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 421 "STRING_TO_ARRAY": exp.Split.from_arg_list, 422 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 423 "STRUCT_PACK": exp.Struct.from_arg_list, 424 "STR_SPLIT": exp.Split.from_arg_list, 425 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 426 "TIME_BUCKET": exp.DateBin.from_arg_list, 427 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 428 "UNNEST": exp.Explode.from_arg_list, 429 "XOR": binary_from_function(exp.BitwiseXor), 430 } 431 432 FUNCTIONS.pop("DATE_SUB") 433 FUNCTIONS.pop("GLOB") 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 **dict.fromkeys( 438 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 439 ), 440 } 441 FUNCTION_PARSERS.pop("DECODE") 442 443 NO_PAREN_FUNCTION_PARSERS = { 444 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 445 "MAP": lambda self: self._parse_map(), 446 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 447 } 448 449 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 450 TokenType.SEMI, 451 TokenType.ANTI, 452 } 453 454 PLACEHOLDER_PARSERS = { 455 **parser.Parser.PLACEHOLDER_PARSERS, 456 TokenType.PARAMETER: lambda self: ( 457 self.expression(exp.Placeholder, this=self._prev.text) 458 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 459 else None 460 ), 461 } 462 463 TYPE_CONVERTERS = { 464 # https://duckdb.org/docs/sql/data_types/numeric 465 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 466 # https://duckdb.org/docs/sql/data_types/text 467 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 468 } 469 470 STATEMENT_PARSERS = { 471 **parser.Parser.STATEMENT_PARSERS, 472 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 473 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 474 TokenType.FORCE: lambda self: self._parse_force(), 475 TokenType.INSTALL: lambda self: self._parse_install(), 476 TokenType.SHOW: lambda self: self._parse_show(), 477 } 478 479 SET_PARSERS = { 480 **parser.Parser.SET_PARSERS, 481 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 482 } 483 484 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 485 index = self._index 486 if not self._match_text_seq("LAMBDA"): 487 return super()._parse_lambda(alias=alias) 488 489 expressions = self._parse_csv(self._parse_lambda_arg) 490 if not self._match(TokenType.COLON): 491 self._retreat(index) 492 return None 493 494 this = self._replace_lambda(self._parse_assignment(), expressions) 495 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 496 497 def _parse_expression(self) -> t.Optional[exp.Expression]: 498 # DuckDB supports prefix aliases, e.g. foo: 1 499 if self._next and self._next.token_type == TokenType.COLON: 500 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 501 self._match(TokenType.COLON) 502 comments = self._prev_comments or [] 503 504 this = self._parse_assignment() 505 if isinstance(this, exp.Expression): 506 # Moves the comment next to the alias in `alias: expr /* comment */` 507 comments += this.pop_comments() or [] 508 509 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 510 511 return super()._parse_expression() 512 513 def _parse_table( 514 self, 515 schema: bool = False, 516 joins: bool = False, 517 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 518 parse_bracket: bool = False, 519 is_db_reference: bool = False, 520 parse_partition: bool = False, 521 consume_pipe: bool = False, 522 ) -> t.Optional[exp.Expression]: 523 # DuckDB supports prefix aliases, e.g. FROM foo: bar 524 if self._next and self._next.token_type == TokenType.COLON: 525 alias = self._parse_table_alias( 526 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 527 ) 528 self._match(TokenType.COLON) 529 comments = self._prev_comments or [] 530 else: 531 alias = None 532 comments = [] 533 534 table = super()._parse_table( 535 schema=schema, 536 joins=joins, 537 alias_tokens=alias_tokens, 538 parse_bracket=parse_bracket, 539 is_db_reference=is_db_reference, 540 parse_partition=parse_partition, 541 ) 542 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 543 # Moves the comment next to the alias in `alias: table /* comment */` 544 comments += table.pop_comments() or [] 545 alias.comments = alias.pop_comments() + comments 546 table.set("alias", alias) 547 548 return table 549 550 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 551 # https://duckdb.org/docs/sql/samples.html 552 sample = super()._parse_table_sample(as_modifier=as_modifier) 553 if sample and not sample.args.get("method"): 554 if sample.args.get("size"): 555 sample.set("method", exp.var("RESERVOIR")) 556 else: 557 sample.set("method", exp.var("SYSTEM")) 558 559 return sample 560 561 def _parse_bracket( 562 self, this: t.Optional[exp.Expression] = None 563 ) -> t.Optional[exp.Expression]: 564 bracket = super()._parse_bracket(this) 565 566 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 567 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 568 bracket.set("returns_list_for_maps", True) 569 570 return bracket 571 572 def _parse_map(self) -> exp.ToMap | exp.Map: 573 if self._match(TokenType.L_BRACE, advance=False): 574 return self.expression(exp.ToMap, this=self._parse_bracket()) 575 576 args = self._parse_wrapped_csv(self._parse_assignment) 577 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 578 579 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 580 return self._parse_field_def() 581 582 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 583 if len(aggregations) == 1: 584 return super()._pivot_column_names(aggregations) 585 return pivot_column_names(aggregations, dialect="duckdb") 586 587 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 588 def _parse_attach_option() -> exp.AttachOption: 589 return self.expression( 590 exp.AttachOption, 591 this=self._parse_var(any_token=True), 592 expression=self._parse_field(any_token=True), 593 ) 594 595 self._match(TokenType.DATABASE) 596 exists = self._parse_exists(not_=is_attach) 597 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 598 599 if self._match(TokenType.L_PAREN, advance=False): 600 expressions = self._parse_wrapped_csv(_parse_attach_option) 601 else: 602 expressions = None 603 604 return ( 605 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 606 if is_attach 607 else self.expression(exp.Detach, this=this, exists=exists) 608 ) 609 610 def _parse_show_duckdb(self, this: str) -> exp.Show: 611 return self.expression(exp.Show, this=this) 612 613 def _parse_force(self) -> exp.Install | exp.Command: 614 # FORCE can only be followed by INSTALL or CHECKPOINT 615 # In the case of CHECKPOINT, we fallback 616 if not self._match(TokenType.INSTALL): 617 return self._parse_as_command(self._prev) 618 619 return self._parse_install(force=True) 620 621 def _parse_install(self, force: bool = False) -> exp.Install: 622 return self.expression( 623 exp.Install, 624 **{ # type: ignore 625 "this": self._parse_id_var(), 626 "from": self._parse_var_or_string() if self._match(TokenType.FROM) else None, 627 "force": force, 628 }, 629 ) 630 631 def _parse_primary(self) -> t.Optional[exp.Expression]: 632 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 633 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 634 635 return super()._parse_primary() 636 637 class Generator(generator.Generator): 638 PARAMETER_TOKEN = "$" 639 NAMED_PLACEHOLDER_TOKEN = "$" 640 JOIN_HINTS = False 641 TABLE_HINTS = False 642 QUERY_HINTS = False 643 LIMIT_FETCH = "LIMIT" 644 STRUCT_DELIMITER = ("(", ")") 645 RENAME_TABLE_WITH_DB = False 646 NVL2_SUPPORTED = False 647 SEMI_ANTI_JOIN_WITH_SIDE = False 648 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 649 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 650 LAST_DAY_SUPPORTS_DATE_PART = False 651 JSON_KEY_VALUE_PAIR_SEP = "," 652 IGNORE_NULLS_IN_FUNC = True 653 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 654 SUPPORTS_CREATE_TABLE_LIKE = False 655 MULTI_ARG_DISTINCT = False 656 CAN_IMPLEMENT_ARRAY_ANY = True 657 SUPPORTS_TO_NUMBER = False 658 SUPPORTS_WINDOW_EXCLUDE = True 659 COPY_HAS_INTO_KEYWORD = False 660 STAR_EXCEPT = "EXCLUDE" 661 PAD_FILL_PATTERN_IS_REQUIRED = True 662 ARRAY_CONCAT_IS_VAR_LEN = False 663 ARRAY_SIZE_DIM_REQUIRED = False 664 NORMALIZE_EXTRACT_DATE_PARTS = True 665 SUPPORTS_LIKE_QUANTIFIERS = False 666 667 TRANSFORMS = { 668 **generator.Generator.TRANSFORMS, 669 exp.ApproxDistinct: approx_count_distinct_sql, 670 exp.Array: inline_array_unless_query, 671 exp.ArrayFilter: rename_func("LIST_FILTER"), 672 exp.ArrayRemove: remove_from_array_using_filter, 673 exp.ArraySort: _array_sort_sql, 674 exp.ArraySum: rename_func("LIST_SUM"), 675 exp.ArrayUniqueAgg: lambda self, e: self.func( 676 "LIST", exp.Distinct(expressions=[e.this]) 677 ), 678 exp.BitwiseAndAgg: rename_func("BIT_AND"), 679 exp.BitwiseOrAgg: rename_func("BIT_OR"), 680 exp.BitwiseXor: rename_func("XOR"), 681 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 682 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 683 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 684 exp.CurrentDate: lambda *_: "CURRENT_DATE", 685 exp.CurrentTime: lambda *_: "CURRENT_TIME", 686 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 687 exp.DayOfMonth: rename_func("DAYOFMONTH"), 688 exp.DayOfWeek: rename_func("DAYOFWEEK"), 689 exp.DayOfWeekIso: rename_func("ISODOW"), 690 exp.DayOfYear: rename_func("DAYOFYEAR"), 691 exp.DataType: _datatype_sql, 692 exp.Date: _date_sql, 693 exp.DateAdd: date_delta_to_binary_interval_op(), 694 exp.DateFromParts: rename_func("MAKE_DATE"), 695 exp.DateSub: date_delta_to_binary_interval_op(), 696 exp.DateDiff: _date_diff_sql, 697 exp.DateStrToDate: datestrtodate_sql, 698 exp.Datetime: no_datetime_sql, 699 exp.DatetimeSub: date_delta_to_binary_interval_op(), 700 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 701 exp.DateToDi: lambda self, 702 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 703 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 704 exp.DiToDate: lambda self, 705 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 706 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 707 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 708 exp.GenerateDateArray: _generate_datetime_array_sql, 709 exp.GenerateTimestampArray: _generate_datetime_array_sql, 710 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 711 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 712 exp.Explode: rename_func("UNNEST"), 713 exp.IntDiv: lambda self, e: self.binary(e, "//"), 714 exp.IsInf: rename_func("ISINF"), 715 exp.IsNan: rename_func("ISNAN"), 716 exp.JSONBExists: rename_func("JSON_EXISTS"), 717 exp.JSONExtract: _arrow_json_extract_sql, 718 exp.JSONExtractArray: _json_extract_value_array_sql, 719 exp.JSONExtractScalar: _arrow_json_extract_sql, 720 exp.JSONFormat: _json_format_sql, 721 exp.JSONValueArray: _json_extract_value_array_sql, 722 exp.Lateral: explode_to_unnest_sql, 723 exp.LogicalOr: rename_func("BOOL_OR"), 724 exp.LogicalAnd: rename_func("BOOL_AND"), 725 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 726 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 727 exp.MonthsBetween: lambda self, e: self.func( 728 "DATEDIFF", 729 "'month'", 730 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 731 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 732 ), 733 exp.PercentileCont: rename_func("QUANTILE_CONT"), 734 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 735 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 736 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 737 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 738 exp.RegexpReplace: lambda self, e: self.func( 739 "REGEXP_REPLACE", 740 e.this, 741 e.expression, 742 e.args.get("replacement"), 743 e.args.get("modifiers"), 744 ), 745 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 746 exp.RegexpILike: lambda self, e: self.func( 747 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 748 ), 749 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 750 exp.Return: lambda self, e: self.sql(e, "this"), 751 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 752 exp.Rand: rename_func("RANDOM"), 753 exp.SHA: rename_func("SHA1"), 754 exp.SHA2: sha256_sql, 755 exp.Split: rename_func("STR_SPLIT"), 756 exp.SortArray: _sort_array_sql, 757 exp.StrPosition: strposition_sql, 758 exp.StrToUnix: lambda self, e: self.func( 759 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 760 ), 761 exp.Struct: _struct_sql, 762 exp.Transform: rename_func("LIST_TRANSFORM"), 763 exp.TimeAdd: date_delta_to_binary_interval_op(), 764 exp.Time: no_time_sql, 765 exp.TimeDiff: _timediff_sql, 766 exp.Timestamp: no_timestamp_sql, 767 exp.TimestampDiff: lambda self, e: self.func( 768 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql(), 771 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 772 exp.TimeStrToTime: timestrtotime_sql, 773 exp.TimeStrToUnix: lambda self, e: self.func( 774 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 775 ), 776 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 777 exp.TimeToUnix: rename_func("EPOCH"), 778 exp.TsOrDiToDi: lambda self, 779 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 780 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 781 exp.TsOrDsDiff: lambda self, e: self.func( 782 "DATE_DIFF", 783 f"'{e.args.get('unit') or 'DAY'}'", 784 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 785 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 786 ), 787 exp.UnixToStr: lambda self, e: self.func( 788 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 789 ), 790 exp.DatetimeTrunc: lambda self, e: self.func( 791 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 792 ), 793 exp.UnixToTime: _unix_to_time_sql, 794 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 795 exp.VariancePop: rename_func("VAR_POP"), 796 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 797 exp.Xor: bool_xor_sql, 798 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 799 rename_func("LEVENSHTEIN") 800 ), 801 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 802 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 803 exp.DateBin: rename_func("TIME_BUCKET"), 804 } 805 806 SUPPORTED_JSON_PATH_PARTS = { 807 exp.JSONPathKey, 808 exp.JSONPathRoot, 809 exp.JSONPathSubscript, 810 exp.JSONPathWildcard, 811 } 812 813 TYPE_MAPPING = { 814 **generator.Generator.TYPE_MAPPING, 815 exp.DataType.Type.BINARY: "BLOB", 816 exp.DataType.Type.BPCHAR: "TEXT", 817 exp.DataType.Type.CHAR: "TEXT", 818 exp.DataType.Type.DATETIME: "TIMESTAMP", 819 exp.DataType.Type.FLOAT: "REAL", 820 exp.DataType.Type.JSONB: "JSON", 821 exp.DataType.Type.NCHAR: "TEXT", 822 exp.DataType.Type.NVARCHAR: "TEXT", 823 exp.DataType.Type.UINT: "UINTEGER", 824 exp.DataType.Type.VARBINARY: "BLOB", 825 exp.DataType.Type.ROWVERSION: "BLOB", 826 exp.DataType.Type.VARCHAR: "TEXT", 827 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 828 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 829 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 830 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 831 } 832 833 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 834 RESERVED_KEYWORDS = { 835 "array", 836 "analyse", 837 "union", 838 "all", 839 "when", 840 "in_p", 841 "default", 842 "create_p", 843 "window", 844 "asymmetric", 845 "to", 846 "else", 847 "localtime", 848 "from", 849 "end_p", 850 "select", 851 "current_date", 852 "foreign", 853 "with", 854 "grant", 855 "session_user", 856 "or", 857 "except", 858 "references", 859 "fetch", 860 "limit", 861 "group_p", 862 "leading", 863 "into", 864 "collate", 865 "offset", 866 "do", 867 "then", 868 "localtimestamp", 869 "check_p", 870 "lateral_p", 871 "current_role", 872 "where", 873 "asc_p", 874 "placing", 875 "desc_p", 876 "user", 877 "unique", 878 "initially", 879 "column", 880 "both", 881 "some", 882 "as", 883 "any", 884 "only", 885 "deferrable", 886 "null_p", 887 "current_time", 888 "true_p", 889 "table", 890 "case", 891 "trailing", 892 "variadic", 893 "for", 894 "on", 895 "distinct", 896 "false_p", 897 "not", 898 "constraint", 899 "current_timestamp", 900 "returning", 901 "primary", 902 "intersect", 903 "having", 904 "analyze", 905 "current_user", 906 "and", 907 "cast", 908 "symmetric", 909 "using", 910 "order", 911 "current_catalog", 912 } 913 914 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 915 916 # DuckDB doesn't generally support CREATE TABLE .. properties 917 # https://duckdb.org/docs/sql/statements/create_table.html 918 PROPERTIES_LOCATION = { 919 prop: exp.Properties.Location.UNSUPPORTED 920 for prop in generator.Generator.PROPERTIES_LOCATION 921 } 922 923 # There are a few exceptions (e.g. temporary tables) which are supported or 924 # can be transpiled to DuckDB, so we explicitly override them accordingly 925 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 926 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 927 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 928 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 929 930 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 931 exp.FirstValue, 932 exp.Lag, 933 exp.LastValue, 934 exp.Lead, 935 exp.NthValue, 936 ) 937 938 def lambda_sql( 939 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 940 ) -> str: 941 if expression.args.get("colon"): 942 prefix = "LAMBDA " 943 arrow_sep = ":" 944 wrap = False 945 else: 946 prefix = "" 947 948 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 949 return f"{prefix}{lambda_sql}" 950 951 def show_sql(self, expression: exp.Show) -> str: 952 return f"SHOW {expression.name}" 953 954 def install_sql(self, expression: exp.Install) -> str: 955 force = "FORCE " if expression.args.get("force") else "" 956 this = self.sql(expression, "this") 957 from_clause = expression.args.get("from") 958 from_clause = f" FROM {from_clause}" if from_clause else "" 959 return f"{force}INSTALL {this}{from_clause}" 960 961 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 962 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 963 964 def strtotime_sql(self, expression: exp.StrToTime) -> str: 965 if expression.args.get("safe"): 966 formatted_time = self.format_time(expression) 967 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 968 return str_to_time_sql(self, expression) 969 970 def strtodate_sql(self, expression: exp.StrToDate) -> str: 971 if expression.args.get("safe"): 972 formatted_time = self.format_time(expression) 973 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 974 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 975 976 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 977 arg = expression.this 978 if expression.args.get("safe"): 979 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 980 return self.func("JSON", arg) 981 982 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 983 nano = expression.args.get("nano") 984 if nano is not None: 985 expression.set( 986 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 987 ) 988 989 return rename_func("MAKE_TIME")(self, expression) 990 991 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 992 sec = expression.args["sec"] 993 994 milli = expression.args.get("milli") 995 if milli is not None: 996 sec += milli.pop() / exp.Literal.number(1000.0) 997 998 nano = expression.args.get("nano") 999 if nano is not None: 1000 sec += nano.pop() / exp.Literal.number(1000000000.0) 1001 1002 if milli or nano: 1003 expression.set("sec", sec) 1004 1005 return rename_func("MAKE_TIMESTAMP")(self, expression) 1006 1007 def tablesample_sql( 1008 self, 1009 expression: exp.TableSample, 1010 tablesample_keyword: t.Optional[str] = None, 1011 ) -> str: 1012 if not isinstance(expression.parent, exp.Select): 1013 # This sample clause only applies to a single source, not the entire resulting relation 1014 tablesample_keyword = "TABLESAMPLE" 1015 1016 if expression.args.get("size"): 1017 method = expression.args.get("method") 1018 if method and method.name.upper() != "RESERVOIR": 1019 self.unsupported( 1020 f"Sampling method {method} is not supported with a discrete sample count, " 1021 "defaulting to reservoir sampling" 1022 ) 1023 expression.set("method", exp.var("RESERVOIR")) 1024 1025 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1026 1027 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1028 if isinstance(expression.parent, exp.UserDefinedFunction): 1029 return self.sql(expression, "this") 1030 return super().columndef_sql(expression, sep) 1031 1032 def join_sql(self, expression: exp.Join) -> str: 1033 if ( 1034 not expression.args.get("using") 1035 and not expression.args.get("on") 1036 and not expression.method 1037 and (expression.kind in ("", "INNER", "OUTER")) 1038 ): 1039 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 1040 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1041 if isinstance(expression.this, exp.Unnest): 1042 return super().join_sql(expression.on(exp.true())) 1043 1044 expression.args.pop("side", None) 1045 expression.args.pop("kind", None) 1046 1047 return super().join_sql(expression) 1048 1049 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1050 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1051 if expression.args.get("is_end_exclusive"): 1052 return rename_func("RANGE")(self, expression) 1053 1054 return self.function_fallback_sql(expression) 1055 1056 def countif_sql(self, expression: exp.CountIf) -> str: 1057 if self.dialect.version >= Version("1.2"): 1058 return self.function_fallback_sql(expression) 1059 1060 # https://github.com/tobymao/sqlglot/pull/4749 1061 return count_if_to_sum(self, expression) 1062 1063 def bracket_sql(self, expression: exp.Bracket) -> str: 1064 if self.dialect.version >= Version("1.2"): 1065 return super().bracket_sql(expression) 1066 1067 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1068 this = expression.this 1069 if isinstance(this, exp.Array): 1070 this.replace(exp.paren(this)) 1071 1072 bracket = super().bracket_sql(expression) 1073 1074 if not expression.args.get("returns_list_for_maps"): 1075 if not this.type: 1076 from sqlglot.optimizer.annotate_types import annotate_types 1077 1078 this = annotate_types(this, dialect=self.dialect) 1079 1080 if this.is_type(exp.DataType.Type.MAP): 1081 bracket = f"({bracket})[1]" 1082 1083 return bracket 1084 1085 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1086 expression_sql = self.sql(expression, "expression") 1087 1088 func = expression.this 1089 if isinstance(func, exp.PERCENTILES): 1090 # Make the order key the first arg and slide the fraction to the right 1091 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1092 order_col = expression.find(exp.Ordered) 1093 if order_col: 1094 func.set("expression", func.this) 1095 func.set("this", order_col.this) 1096 1097 this = self.sql(expression, "this").rstrip(")") 1098 1099 return f"{this}{expression_sql})" 1100 1101 def length_sql(self, expression: exp.Length) -> str: 1102 arg = expression.this 1103 1104 # Dialects like BQ and Snowflake also accept binary values as args, so 1105 # DDB will attempt to infer the type or resort to case/when resolution 1106 if not expression.args.get("binary") or arg.is_string: 1107 return self.func("LENGTH", arg) 1108 1109 if not arg.type: 1110 from sqlglot.optimizer.annotate_types import annotate_types 1111 1112 arg = annotate_types(arg, dialect=self.dialect) 1113 1114 if arg.is_type(*exp.DataType.TEXT_TYPES): 1115 return self.func("LENGTH", arg) 1116 1117 # We need these casts to make duckdb's static type checker happy 1118 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1119 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1120 1121 case = ( 1122 exp.case(self.func("TYPEOF", arg)) 1123 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1124 .else_( 1125 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1126 ) # anonymous to break length_sql recursion 1127 ) 1128 1129 return self.sql(case) 1130 1131 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1132 this = expression.this 1133 key = expression.args.get("key") 1134 key_sql = key.name if isinstance(key, exp.Expression) else "" 1135 value_sql = self.sql(expression, "value") 1136 1137 kv_sql = f"{key_sql} := {value_sql}" 1138 1139 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1140 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1141 if isinstance(this, exp.Struct) and not this.expressions: 1142 return self.func("STRUCT_PACK", kv_sql) 1143 1144 return self.func("STRUCT_INSERT", this, kv_sql) 1145 1146 def unnest_sql(self, expression: exp.Unnest) -> str: 1147 explode_array = expression.args.get("explode_array") 1148 if explode_array: 1149 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1150 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1151 expression.expressions.append( 1152 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1153 ) 1154 1155 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1156 alias = expression.args.get("alias") 1157 if isinstance(alias, exp.TableAlias): 1158 expression.set("alias", None) 1159 if alias.columns: 1160 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1161 1162 unnest_sql = super().unnest_sql(expression) 1163 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1164 return self.sql(select) 1165 1166 return super().unnest_sql(expression) 1167 1168 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1169 this = expression.this 1170 1171 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1172 # DuckDB should render IGNORE NULLS only for the general-purpose 1173 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1174 return super().ignorenulls_sql(expression) 1175 1176 if isinstance(this, exp.First): 1177 this = exp.AnyValue(this=this.this) 1178 1179 if not isinstance(this, exp.AnyValue): 1180 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1181 1182 return self.sql(this) 1183 1184 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1185 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1186 # DuckDB should render RESPECT NULLS only for the general-purpose 1187 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1188 return super().respectnulls_sql(expression) 1189 1190 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1191 return self.sql(expression, "this") 1192 1193 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1194 this = self.sql(expression, "this") 1195 null_text = self.sql(expression, "null") 1196 1197 if null_text: 1198 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1199 1200 return self.func("ARRAY_TO_STRING", this, expression.expression) 1201 1202 @unsupported_args("position", "occurrence") 1203 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1204 group = expression.args.get("group") 1205 params = expression.args.get("parameters") 1206 1207 # Do not render group if there is no following argument, 1208 # and it's the default value for this dialect 1209 if ( 1210 not params 1211 and group 1212 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1213 ): 1214 group = None 1215 return self.func( 1216 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1217 ) 1218 1219 @unsupported_args("culture") 1220 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1221 fmt = expression.args.get("format") 1222 if fmt and fmt.is_int: 1223 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1224 1225 self.unsupported("Only integer formats are supported by NumberToStr") 1226 return self.function_fallback_sql(expression) 1227 1228 def autoincrementcolumnconstraint_sql(self, _) -> str: 1229 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1230 return "" 1231 1232 def aliases_sql(self, expression: exp.Aliases) -> str: 1233 this = expression.this 1234 if isinstance(this, exp.Posexplode): 1235 return self.posexplode_sql(this) 1236 1237 return super().aliases_sql(expression) 1238 1239 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1240 this = expression.this 1241 parent = expression.parent 1242 1243 # The default Spark aliases are "pos" and "col", unless specified otherwise 1244 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1245 1246 if isinstance(parent, exp.Aliases): 1247 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1248 pos, col = parent.expressions 1249 elif isinstance(parent, exp.Table): 1250 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1251 alias = parent.args.get("alias") 1252 if alias: 1253 pos, col = alias.columns or [pos, col] 1254 alias.pop() 1255 1256 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1257 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1258 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1259 gen_subscripts = self.sql( 1260 exp.Alias( 1261 this=exp.Anonymous( 1262 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1263 ) 1264 - exp.Literal.number(1), 1265 alias=pos, 1266 ) 1267 ) 1268 1269 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1270 1271 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1272 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1273 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1274 1275 return posexplode_sql 1276 1277 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1278 this = expression.this 1279 1280 if not this.type: 1281 from sqlglot.optimizer.annotate_types import annotate_types 1282 1283 this = annotate_types(this, dialect=self.dialect) 1284 1285 if this.is_type(*exp.DataType.TEXT_TYPES): 1286 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1287 1288 func = self.func( 1289 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1290 ) 1291 1292 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1293 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1294 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1295 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1296 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1297 return self.sql(exp.Cast(this=func, to=this.type)) 1298 1299 return self.sql(func) 1300 1301 def format_sql(self, expression: exp.Format) -> str: 1302 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1303 return self.func("FORMAT", "'{}'", expression.expressions[0]) 1304 1305 return self.function_fallback_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path)
287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "FORCE": TokenType.FORCE, 309 "INSTALL": TokenType.INSTALL, 310 "LOGICAL": TokenType.BOOLEAN, 311 "ONLY": TokenType.ONLY, 312 "PIVOT_WIDER": TokenType.PIVOT, 313 "POSITIONAL": TokenType.POSITIONAL, 314 "RESET": TokenType.COMMAND, 315 "SIGNED": TokenType.INT, 316 "STRING": TokenType.TEXT, 317 "SUMMARIZE": TokenType.SUMMARIZE, 318 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 319 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 320 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 321 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 322 "TIMESTAMP_US": TokenType.TIMESTAMP, 323 "UBIGINT": TokenType.UBIGINT, 324 "UINTEGER": TokenType.UINT, 325 "USMALLINT": TokenType.USMALLINT, 326 "UTINYINT": TokenType.UTINYINT, 327 "VARCHAR": TokenType.TEXT, 328 } 329 KEYWORDS.pop("/*+") 330 331 SINGLE_TOKENS = { 332 **tokens.Tokenizer.SINGLE_TOKENS, 333 "$": TokenType.PARAMETER, 334 } 335 336 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
338 class Parser(parser.Parser): 339 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 340 341 BITWISE = { 342 **parser.Parser.BITWISE, 343 TokenType.TILDA: exp.RegexpLike, 344 } 345 BITWISE.pop(TokenType.CARET) 346 347 RANGE_PARSERS = { 348 **parser.Parser.RANGE_PARSERS, 349 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 350 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 351 } 352 353 EXPONENT = { 354 **parser.Parser.EXPONENT, 355 TokenType.CARET: exp.Pow, 356 TokenType.DSTAR: exp.Pow, 357 } 358 359 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 360 361 SHOW_PARSERS = { 362 "TABLES": _show_parser("TABLES"), 363 "ALL TABLES": _show_parser("ALL TABLES"), 364 } 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 369 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 370 "ARRAY_SORT": exp.SortArray.from_arg_list, 371 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 372 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 373 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 374 "DATEDIFF": _build_date_diff, 375 "DATE_DIFF": _build_date_diff, 376 "DATE_TRUNC": date_trunc_to_time, 377 "DATETRUNC": date_trunc_to_time, 378 "DECODE": lambda args: exp.Decode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EDITDIST3": exp.Levenshtein.from_arg_list, 382 "ENCODE": lambda args: exp.Encode( 383 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 384 ), 385 "EPOCH": exp.TimeToUnix.from_arg_list, 386 "EPOCH_MS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "GENERATE_SERIES": _build_generate_series(), 390 "JSON": exp.ParseJSON.from_arg_list, 391 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 392 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 393 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 394 "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, 395 "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, 396 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 397 "LIST_HAS": exp.ArrayContains.from_arg_list, 398 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 399 "LIST_REVERSE_SORT": _build_sort_array_desc, 400 "LIST_SORT": exp.SortArray.from_arg_list, 401 "LIST_TRANSFORM": exp.Transform.from_arg_list, 402 "LIST_VALUE": lambda args: exp.Array(expressions=args), 403 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 404 "MAKE_TIMESTAMP": _build_make_timestamp, 405 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 406 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 407 "RANGE": _build_generate_series(end_exclusive=True), 408 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 409 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 410 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 411 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 412 this=seq_get(args, 0), 413 expression=seq_get(args, 1), 414 replacement=seq_get(args, 2), 415 modifiers=seq_get(args, 3), 416 ), 417 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 418 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 419 "STRING_SPLIT": exp.Split.from_arg_list, 420 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 421 "STRING_TO_ARRAY": exp.Split.from_arg_list, 422 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 423 "STRUCT_PACK": exp.Struct.from_arg_list, 424 "STR_SPLIT": exp.Split.from_arg_list, 425 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 426 "TIME_BUCKET": exp.DateBin.from_arg_list, 427 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 428 "UNNEST": exp.Explode.from_arg_list, 429 "XOR": binary_from_function(exp.BitwiseXor), 430 } 431 432 FUNCTIONS.pop("DATE_SUB") 433 FUNCTIONS.pop("GLOB") 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 **dict.fromkeys( 438 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 439 ), 440 } 441 FUNCTION_PARSERS.pop("DECODE") 442 443 NO_PAREN_FUNCTION_PARSERS = { 444 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 445 "MAP": lambda self: self._parse_map(), 446 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 447 } 448 449 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 450 TokenType.SEMI, 451 TokenType.ANTI, 452 } 453 454 PLACEHOLDER_PARSERS = { 455 **parser.Parser.PLACEHOLDER_PARSERS, 456 TokenType.PARAMETER: lambda self: ( 457 self.expression(exp.Placeholder, this=self._prev.text) 458 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 459 else None 460 ), 461 } 462 463 TYPE_CONVERTERS = { 464 # https://duckdb.org/docs/sql/data_types/numeric 465 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 466 # https://duckdb.org/docs/sql/data_types/text 467 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 468 } 469 470 STATEMENT_PARSERS = { 471 **parser.Parser.STATEMENT_PARSERS, 472 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 473 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 474 TokenType.FORCE: lambda self: self._parse_force(), 475 TokenType.INSTALL: lambda self: self._parse_install(), 476 TokenType.SHOW: lambda self: self._parse_show(), 477 } 478 479 SET_PARSERS = { 480 **parser.Parser.SET_PARSERS, 481 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 482 } 483 484 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 485 index = self._index 486 if not self._match_text_seq("LAMBDA"): 487 return super()._parse_lambda(alias=alias) 488 489 expressions = self._parse_csv(self._parse_lambda_arg) 490 if not self._match(TokenType.COLON): 491 self._retreat(index) 492 return None 493 494 this = self._replace_lambda(self._parse_assignment(), expressions) 495 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 496 497 def _parse_expression(self) -> t.Optional[exp.Expression]: 498 # DuckDB supports prefix aliases, e.g. foo: 1 499 if self._next and self._next.token_type == TokenType.COLON: 500 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 501 self._match(TokenType.COLON) 502 comments = self._prev_comments or [] 503 504 this = self._parse_assignment() 505 if isinstance(this, exp.Expression): 506 # Moves the comment next to the alias in `alias: expr /* comment */` 507 comments += this.pop_comments() or [] 508 509 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 510 511 return super()._parse_expression() 512 513 def _parse_table( 514 self, 515 schema: bool = False, 516 joins: bool = False, 517 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 518 parse_bracket: bool = False, 519 is_db_reference: bool = False, 520 parse_partition: bool = False, 521 consume_pipe: bool = False, 522 ) -> t.Optional[exp.Expression]: 523 # DuckDB supports prefix aliases, e.g. FROM foo: bar 524 if self._next and self._next.token_type == TokenType.COLON: 525 alias = self._parse_table_alias( 526 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 527 ) 528 self._match(TokenType.COLON) 529 comments = self._prev_comments or [] 530 else: 531 alias = None 532 comments = [] 533 534 table = super()._parse_table( 535 schema=schema, 536 joins=joins, 537 alias_tokens=alias_tokens, 538 parse_bracket=parse_bracket, 539 is_db_reference=is_db_reference, 540 parse_partition=parse_partition, 541 ) 542 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 543 # Moves the comment next to the alias in `alias: table /* comment */` 544 comments += table.pop_comments() or [] 545 alias.comments = alias.pop_comments() + comments 546 table.set("alias", alias) 547 548 return table 549 550 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 551 # https://duckdb.org/docs/sql/samples.html 552 sample = super()._parse_table_sample(as_modifier=as_modifier) 553 if sample and not sample.args.get("method"): 554 if sample.args.get("size"): 555 sample.set("method", exp.var("RESERVOIR")) 556 else: 557 sample.set("method", exp.var("SYSTEM")) 558 559 return sample 560 561 def _parse_bracket( 562 self, this: t.Optional[exp.Expression] = None 563 ) -> t.Optional[exp.Expression]: 564 bracket = super()._parse_bracket(this) 565 566 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 567 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 568 bracket.set("returns_list_for_maps", True) 569 570 return bracket 571 572 def _parse_map(self) -> exp.ToMap | exp.Map: 573 if self._match(TokenType.L_BRACE, advance=False): 574 return self.expression(exp.ToMap, this=self._parse_bracket()) 575 576 args = self._parse_wrapped_csv(self._parse_assignment) 577 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 578 579 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 580 return self._parse_field_def() 581 582 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 583 if len(aggregations) == 1: 584 return super()._pivot_column_names(aggregations) 585 return pivot_column_names(aggregations, dialect="duckdb") 586 587 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 588 def _parse_attach_option() -> exp.AttachOption: 589 return self.expression( 590 exp.AttachOption, 591 this=self._parse_var(any_token=True), 592 expression=self._parse_field(any_token=True), 593 ) 594 595 self._match(TokenType.DATABASE) 596 exists = self._parse_exists(not_=is_attach) 597 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 598 599 if self._match(TokenType.L_PAREN, advance=False): 600 expressions = self._parse_wrapped_csv(_parse_attach_option) 601 else: 602 expressions = None 603 604 return ( 605 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 606 if is_attach 607 else self.expression(exp.Detach, this=this, exists=exists) 608 ) 609 610 def _parse_show_duckdb(self, this: str) -> exp.Show: 611 return self.expression(exp.Show, this=this) 612 613 def _parse_force(self) -> exp.Install | exp.Command: 614 # FORCE can only be followed by INSTALL or CHECKPOINT 615 # In the case of CHECKPOINT, we fallback 616 if not self._match(TokenType.INSTALL): 617 return self._parse_as_command(self._prev) 618 619 return self._parse_install(force=True) 620 621 def _parse_install(self, force: bool = False) -> exp.Install: 622 return self.expression( 623 exp.Install, 624 **{ # type: ignore 625 "this": self._parse_id_var(), 626 "from": self._parse_var_or_string() if self._match(TokenType.FROM) else None, 627 "force": force, 628 }, 629 ) 630 631 def _parse_primary(self) -> t.Optional[exp.Expression]: 632 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 633 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 634 635 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
637 class Generator(generator.Generator): 638 PARAMETER_TOKEN = "$" 639 NAMED_PLACEHOLDER_TOKEN = "$" 640 JOIN_HINTS = False 641 TABLE_HINTS = False 642 QUERY_HINTS = False 643 LIMIT_FETCH = "LIMIT" 644 STRUCT_DELIMITER = ("(", ")") 645 RENAME_TABLE_WITH_DB = False 646 NVL2_SUPPORTED = False 647 SEMI_ANTI_JOIN_WITH_SIDE = False 648 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 649 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 650 LAST_DAY_SUPPORTS_DATE_PART = False 651 JSON_KEY_VALUE_PAIR_SEP = "," 652 IGNORE_NULLS_IN_FUNC = True 653 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 654 SUPPORTS_CREATE_TABLE_LIKE = False 655 MULTI_ARG_DISTINCT = False 656 CAN_IMPLEMENT_ARRAY_ANY = True 657 SUPPORTS_TO_NUMBER = False 658 SUPPORTS_WINDOW_EXCLUDE = True 659 COPY_HAS_INTO_KEYWORD = False 660 STAR_EXCEPT = "EXCLUDE" 661 PAD_FILL_PATTERN_IS_REQUIRED = True 662 ARRAY_CONCAT_IS_VAR_LEN = False 663 ARRAY_SIZE_DIM_REQUIRED = False 664 NORMALIZE_EXTRACT_DATE_PARTS = True 665 SUPPORTS_LIKE_QUANTIFIERS = False 666 667 TRANSFORMS = { 668 **generator.Generator.TRANSFORMS, 669 exp.ApproxDistinct: approx_count_distinct_sql, 670 exp.Array: inline_array_unless_query, 671 exp.ArrayFilter: rename_func("LIST_FILTER"), 672 exp.ArrayRemove: remove_from_array_using_filter, 673 exp.ArraySort: _array_sort_sql, 674 exp.ArraySum: rename_func("LIST_SUM"), 675 exp.ArrayUniqueAgg: lambda self, e: self.func( 676 "LIST", exp.Distinct(expressions=[e.this]) 677 ), 678 exp.BitwiseAndAgg: rename_func("BIT_AND"), 679 exp.BitwiseOrAgg: rename_func("BIT_OR"), 680 exp.BitwiseXor: rename_func("XOR"), 681 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 682 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 683 exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"), 684 exp.CurrentDate: lambda *_: "CURRENT_DATE", 685 exp.CurrentTime: lambda *_: "CURRENT_TIME", 686 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 687 exp.DayOfMonth: rename_func("DAYOFMONTH"), 688 exp.DayOfWeek: rename_func("DAYOFWEEK"), 689 exp.DayOfWeekIso: rename_func("ISODOW"), 690 exp.DayOfYear: rename_func("DAYOFYEAR"), 691 exp.DataType: _datatype_sql, 692 exp.Date: _date_sql, 693 exp.DateAdd: date_delta_to_binary_interval_op(), 694 exp.DateFromParts: rename_func("MAKE_DATE"), 695 exp.DateSub: date_delta_to_binary_interval_op(), 696 exp.DateDiff: _date_diff_sql, 697 exp.DateStrToDate: datestrtodate_sql, 698 exp.Datetime: no_datetime_sql, 699 exp.DatetimeSub: date_delta_to_binary_interval_op(), 700 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 701 exp.DateToDi: lambda self, 702 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 703 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 704 exp.DiToDate: lambda self, 705 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 706 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 707 exp.EuclideanDistance: rename_func("LIST_DISTANCE"), 708 exp.GenerateDateArray: _generate_datetime_array_sql, 709 exp.GenerateTimestampArray: _generate_datetime_array_sql, 710 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 711 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 712 exp.Explode: rename_func("UNNEST"), 713 exp.IntDiv: lambda self, e: self.binary(e, "//"), 714 exp.IsInf: rename_func("ISINF"), 715 exp.IsNan: rename_func("ISNAN"), 716 exp.JSONBExists: rename_func("JSON_EXISTS"), 717 exp.JSONExtract: _arrow_json_extract_sql, 718 exp.JSONExtractArray: _json_extract_value_array_sql, 719 exp.JSONExtractScalar: _arrow_json_extract_sql, 720 exp.JSONFormat: _json_format_sql, 721 exp.JSONValueArray: _json_extract_value_array_sql, 722 exp.Lateral: explode_to_unnest_sql, 723 exp.LogicalOr: rename_func("BOOL_OR"), 724 exp.LogicalAnd: rename_func("BOOL_AND"), 725 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 726 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 727 exp.MonthsBetween: lambda self, e: self.func( 728 "DATEDIFF", 729 "'month'", 730 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 731 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 732 ), 733 exp.PercentileCont: rename_func("QUANTILE_CONT"), 734 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 735 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 736 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 737 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 738 exp.RegexpReplace: lambda self, e: self.func( 739 "REGEXP_REPLACE", 740 e.this, 741 e.expression, 742 e.args.get("replacement"), 743 e.args.get("modifiers"), 744 ), 745 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 746 exp.RegexpILike: lambda self, e: self.func( 747 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 748 ), 749 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 750 exp.Return: lambda self, e: self.sql(e, "this"), 751 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 752 exp.Rand: rename_func("RANDOM"), 753 exp.SHA: rename_func("SHA1"), 754 exp.SHA2: sha256_sql, 755 exp.Split: rename_func("STR_SPLIT"), 756 exp.SortArray: _sort_array_sql, 757 exp.StrPosition: strposition_sql, 758 exp.StrToUnix: lambda self, e: self.func( 759 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 760 ), 761 exp.Struct: _struct_sql, 762 exp.Transform: rename_func("LIST_TRANSFORM"), 763 exp.TimeAdd: date_delta_to_binary_interval_op(), 764 exp.Time: no_time_sql, 765 exp.TimeDiff: _timediff_sql, 766 exp.Timestamp: no_timestamp_sql, 767 exp.TimestampDiff: lambda self, e: self.func( 768 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql(), 771 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 772 exp.TimeStrToTime: timestrtotime_sql, 773 exp.TimeStrToUnix: lambda self, e: self.func( 774 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 775 ), 776 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 777 exp.TimeToUnix: rename_func("EPOCH"), 778 exp.TsOrDiToDi: lambda self, 779 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 780 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 781 exp.TsOrDsDiff: lambda self, e: self.func( 782 "DATE_DIFF", 783 f"'{e.args.get('unit') or 'DAY'}'", 784 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 785 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 786 ), 787 exp.UnixToStr: lambda self, e: self.func( 788 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 789 ), 790 exp.DatetimeTrunc: lambda self, e: self.func( 791 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 792 ), 793 exp.UnixToTime: _unix_to_time_sql, 794 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 795 exp.VariancePop: rename_func("VAR_POP"), 796 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 797 exp.Xor: bool_xor_sql, 798 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 799 rename_func("LEVENSHTEIN") 800 ), 801 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 802 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 803 exp.DateBin: rename_func("TIME_BUCKET"), 804 } 805 806 SUPPORTED_JSON_PATH_PARTS = { 807 exp.JSONPathKey, 808 exp.JSONPathRoot, 809 exp.JSONPathSubscript, 810 exp.JSONPathWildcard, 811 } 812 813 TYPE_MAPPING = { 814 **generator.Generator.TYPE_MAPPING, 815 exp.DataType.Type.BINARY: "BLOB", 816 exp.DataType.Type.BPCHAR: "TEXT", 817 exp.DataType.Type.CHAR: "TEXT", 818 exp.DataType.Type.DATETIME: "TIMESTAMP", 819 exp.DataType.Type.FLOAT: "REAL", 820 exp.DataType.Type.JSONB: "JSON", 821 exp.DataType.Type.NCHAR: "TEXT", 822 exp.DataType.Type.NVARCHAR: "TEXT", 823 exp.DataType.Type.UINT: "UINTEGER", 824 exp.DataType.Type.VARBINARY: "BLOB", 825 exp.DataType.Type.ROWVERSION: "BLOB", 826 exp.DataType.Type.VARCHAR: "TEXT", 827 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 828 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 829 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 830 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 831 } 832 833 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 834 RESERVED_KEYWORDS = { 835 "array", 836 "analyse", 837 "union", 838 "all", 839 "when", 840 "in_p", 841 "default", 842 "create_p", 843 "window", 844 "asymmetric", 845 "to", 846 "else", 847 "localtime", 848 "from", 849 "end_p", 850 "select", 851 "current_date", 852 "foreign", 853 "with", 854 "grant", 855 "session_user", 856 "or", 857 "except", 858 "references", 859 "fetch", 860 "limit", 861 "group_p", 862 "leading", 863 "into", 864 "collate", 865 "offset", 866 "do", 867 "then", 868 "localtimestamp", 869 "check_p", 870 "lateral_p", 871 "current_role", 872 "where", 873 "asc_p", 874 "placing", 875 "desc_p", 876 "user", 877 "unique", 878 "initially", 879 "column", 880 "both", 881 "some", 882 "as", 883 "any", 884 "only", 885 "deferrable", 886 "null_p", 887 "current_time", 888 "true_p", 889 "table", 890 "case", 891 "trailing", 892 "variadic", 893 "for", 894 "on", 895 "distinct", 896 "false_p", 897 "not", 898 "constraint", 899 "current_timestamp", 900 "returning", 901 "primary", 902 "intersect", 903 "having", 904 "analyze", 905 "current_user", 906 "and", 907 "cast", 908 "symmetric", 909 "using", 910 "order", 911 "current_catalog", 912 } 913 914 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 915 916 # DuckDB doesn't generally support CREATE TABLE .. properties 917 # https://duckdb.org/docs/sql/statements/create_table.html 918 PROPERTIES_LOCATION = { 919 prop: exp.Properties.Location.UNSUPPORTED 920 for prop in generator.Generator.PROPERTIES_LOCATION 921 } 922 923 # There are a few exceptions (e.g. temporary tables) which are supported or 924 # can be transpiled to DuckDB, so we explicitly override them accordingly 925 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 926 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 927 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 928 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 929 930 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 931 exp.FirstValue, 932 exp.Lag, 933 exp.LastValue, 934 exp.Lead, 935 exp.NthValue, 936 ) 937 938 def lambda_sql( 939 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 940 ) -> str: 941 if expression.args.get("colon"): 942 prefix = "LAMBDA " 943 arrow_sep = ":" 944 wrap = False 945 else: 946 prefix = "" 947 948 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 949 return f"{prefix}{lambda_sql}" 950 951 def show_sql(self, expression: exp.Show) -> str: 952 return f"SHOW {expression.name}" 953 954 def install_sql(self, expression: exp.Install) -> str: 955 force = "FORCE " if expression.args.get("force") else "" 956 this = self.sql(expression, "this") 957 from_clause = expression.args.get("from") 958 from_clause = f" FROM {from_clause}" if from_clause else "" 959 return f"{force}INSTALL {this}{from_clause}" 960 961 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 962 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 963 964 def strtotime_sql(self, expression: exp.StrToTime) -> str: 965 if expression.args.get("safe"): 966 formatted_time = self.format_time(expression) 967 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 968 return str_to_time_sql(self, expression) 969 970 def strtodate_sql(self, expression: exp.StrToDate) -> str: 971 if expression.args.get("safe"): 972 formatted_time = self.format_time(expression) 973 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 974 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 975 976 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 977 arg = expression.this 978 if expression.args.get("safe"): 979 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 980 return self.func("JSON", arg) 981 982 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 983 nano = expression.args.get("nano") 984 if nano is not None: 985 expression.set( 986 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 987 ) 988 989 return rename_func("MAKE_TIME")(self, expression) 990 991 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 992 sec = expression.args["sec"] 993 994 milli = expression.args.get("milli") 995 if milli is not None: 996 sec += milli.pop() / exp.Literal.number(1000.0) 997 998 nano = expression.args.get("nano") 999 if nano is not None: 1000 sec += nano.pop() / exp.Literal.number(1000000000.0) 1001 1002 if milli or nano: 1003 expression.set("sec", sec) 1004 1005 return rename_func("MAKE_TIMESTAMP")(self, expression) 1006 1007 def tablesample_sql( 1008 self, 1009 expression: exp.TableSample, 1010 tablesample_keyword: t.Optional[str] = None, 1011 ) -> str: 1012 if not isinstance(expression.parent, exp.Select): 1013 # This sample clause only applies to a single source, not the entire resulting relation 1014 tablesample_keyword = "TABLESAMPLE" 1015 1016 if expression.args.get("size"): 1017 method = expression.args.get("method") 1018 if method and method.name.upper() != "RESERVOIR": 1019 self.unsupported( 1020 f"Sampling method {method} is not supported with a discrete sample count, " 1021 "defaulting to reservoir sampling" 1022 ) 1023 expression.set("method", exp.var("RESERVOIR")) 1024 1025 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1026 1027 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1028 if isinstance(expression.parent, exp.UserDefinedFunction): 1029 return self.sql(expression, "this") 1030 return super().columndef_sql(expression, sep) 1031 1032 def join_sql(self, expression: exp.Join) -> str: 1033 if ( 1034 not expression.args.get("using") 1035 and not expression.args.get("on") 1036 and not expression.method 1037 and (expression.kind in ("", "INNER", "OUTER")) 1038 ): 1039 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 1040 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1041 if isinstance(expression.this, exp.Unnest): 1042 return super().join_sql(expression.on(exp.true())) 1043 1044 expression.args.pop("side", None) 1045 expression.args.pop("kind", None) 1046 1047 return super().join_sql(expression) 1048 1049 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1050 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1051 if expression.args.get("is_end_exclusive"): 1052 return rename_func("RANGE")(self, expression) 1053 1054 return self.function_fallback_sql(expression) 1055 1056 def countif_sql(self, expression: exp.CountIf) -> str: 1057 if self.dialect.version >= Version("1.2"): 1058 return self.function_fallback_sql(expression) 1059 1060 # https://github.com/tobymao/sqlglot/pull/4749 1061 return count_if_to_sum(self, expression) 1062 1063 def bracket_sql(self, expression: exp.Bracket) -> str: 1064 if self.dialect.version >= Version("1.2"): 1065 return super().bracket_sql(expression) 1066 1067 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1068 this = expression.this 1069 if isinstance(this, exp.Array): 1070 this.replace(exp.paren(this)) 1071 1072 bracket = super().bracket_sql(expression) 1073 1074 if not expression.args.get("returns_list_for_maps"): 1075 if not this.type: 1076 from sqlglot.optimizer.annotate_types import annotate_types 1077 1078 this = annotate_types(this, dialect=self.dialect) 1079 1080 if this.is_type(exp.DataType.Type.MAP): 1081 bracket = f"({bracket})[1]" 1082 1083 return bracket 1084 1085 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1086 expression_sql = self.sql(expression, "expression") 1087 1088 func = expression.this 1089 if isinstance(func, exp.PERCENTILES): 1090 # Make the order key the first arg and slide the fraction to the right 1091 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1092 order_col = expression.find(exp.Ordered) 1093 if order_col: 1094 func.set("expression", func.this) 1095 func.set("this", order_col.this) 1096 1097 this = self.sql(expression, "this").rstrip(")") 1098 1099 return f"{this}{expression_sql})" 1100 1101 def length_sql(self, expression: exp.Length) -> str: 1102 arg = expression.this 1103 1104 # Dialects like BQ and Snowflake also accept binary values as args, so 1105 # DDB will attempt to infer the type or resort to case/when resolution 1106 if not expression.args.get("binary") or arg.is_string: 1107 return self.func("LENGTH", arg) 1108 1109 if not arg.type: 1110 from sqlglot.optimizer.annotate_types import annotate_types 1111 1112 arg = annotate_types(arg, dialect=self.dialect) 1113 1114 if arg.is_type(*exp.DataType.TEXT_TYPES): 1115 return self.func("LENGTH", arg) 1116 1117 # We need these casts to make duckdb's static type checker happy 1118 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1119 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1120 1121 case = ( 1122 exp.case(self.func("TYPEOF", arg)) 1123 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1124 .else_( 1125 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1126 ) # anonymous to break length_sql recursion 1127 ) 1128 1129 return self.sql(case) 1130 1131 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1132 this = expression.this 1133 key = expression.args.get("key") 1134 key_sql = key.name if isinstance(key, exp.Expression) else "" 1135 value_sql = self.sql(expression, "value") 1136 1137 kv_sql = f"{key_sql} := {value_sql}" 1138 1139 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1140 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1141 if isinstance(this, exp.Struct) and not this.expressions: 1142 return self.func("STRUCT_PACK", kv_sql) 1143 1144 return self.func("STRUCT_INSERT", this, kv_sql) 1145 1146 def unnest_sql(self, expression: exp.Unnest) -> str: 1147 explode_array = expression.args.get("explode_array") 1148 if explode_array: 1149 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1150 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1151 expression.expressions.append( 1152 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1153 ) 1154 1155 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1156 alias = expression.args.get("alias") 1157 if isinstance(alias, exp.TableAlias): 1158 expression.set("alias", None) 1159 if alias.columns: 1160 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1161 1162 unnest_sql = super().unnest_sql(expression) 1163 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1164 return self.sql(select) 1165 1166 return super().unnest_sql(expression) 1167 1168 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1169 this = expression.this 1170 1171 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1172 # DuckDB should render IGNORE NULLS only for the general-purpose 1173 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1174 return super().ignorenulls_sql(expression) 1175 1176 if isinstance(this, exp.First): 1177 this = exp.AnyValue(this=this.this) 1178 1179 if not isinstance(this, exp.AnyValue): 1180 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1181 1182 return self.sql(this) 1183 1184 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1185 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1186 # DuckDB should render RESPECT NULLS only for the general-purpose 1187 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1188 return super().respectnulls_sql(expression) 1189 1190 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1191 return self.sql(expression, "this") 1192 1193 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1194 this = self.sql(expression, "this") 1195 null_text = self.sql(expression, "null") 1196 1197 if null_text: 1198 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1199 1200 return self.func("ARRAY_TO_STRING", this, expression.expression) 1201 1202 @unsupported_args("position", "occurrence") 1203 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1204 group = expression.args.get("group") 1205 params = expression.args.get("parameters") 1206 1207 # Do not render group if there is no following argument, 1208 # and it's the default value for this dialect 1209 if ( 1210 not params 1211 and group 1212 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1213 ): 1214 group = None 1215 return self.func( 1216 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1217 ) 1218 1219 @unsupported_args("culture") 1220 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1221 fmt = expression.args.get("format") 1222 if fmt and fmt.is_int: 1223 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1224 1225 self.unsupported("Only integer formats are supported by NumberToStr") 1226 return self.function_fallback_sql(expression) 1227 1228 def autoincrementcolumnconstraint_sql(self, _) -> str: 1229 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1230 return "" 1231 1232 def aliases_sql(self, expression: exp.Aliases) -> str: 1233 this = expression.this 1234 if isinstance(this, exp.Posexplode): 1235 return self.posexplode_sql(this) 1236 1237 return super().aliases_sql(expression) 1238 1239 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1240 this = expression.this 1241 parent = expression.parent 1242 1243 # The default Spark aliases are "pos" and "col", unless specified otherwise 1244 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1245 1246 if isinstance(parent, exp.Aliases): 1247 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1248 pos, col = parent.expressions 1249 elif isinstance(parent, exp.Table): 1250 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1251 alias = parent.args.get("alias") 1252 if alias: 1253 pos, col = alias.columns or [pos, col] 1254 alias.pop() 1255 1256 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1257 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1258 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1259 gen_subscripts = self.sql( 1260 exp.Alias( 1261 this=exp.Anonymous( 1262 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1263 ) 1264 - exp.Literal.number(1), 1265 alias=pos, 1266 ) 1267 ) 1268 1269 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1270 1271 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1272 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1273 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1274 1275 return posexplode_sql 1276 1277 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1278 this = expression.this 1279 1280 if not this.type: 1281 from sqlglot.optimizer.annotate_types import annotate_types 1282 1283 this = annotate_types(this, dialect=self.dialect) 1284 1285 if this.is_type(*exp.DataType.TEXT_TYPES): 1286 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1287 1288 func = self.func( 1289 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1290 ) 1291 1292 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1293 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1294 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1295 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1296 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1297 return self.sql(exp.Cast(this=func, to=this.type)) 1298 1299 return self.sql(func) 1300 1301 def format_sql(self, expression: exp.Format) -> str: 1302 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1303 return self.func("FORMAT", "'{}'", expression.expressions[0]) 1304 1305 return self.function_fallback_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
938 def lambda_sql( 939 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 940 ) -> str: 941 if expression.args.get("colon"): 942 prefix = "LAMBDA " 943 arrow_sep = ":" 944 wrap = False 945 else: 946 prefix = "" 947 948 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 949 return f"{prefix}{lambda_sql}"
954 def install_sql(self, expression: exp.Install) -> str: 955 force = "FORCE " if expression.args.get("force") else "" 956 this = self.sql(expression, "this") 957 from_clause = expression.args.get("from") 958 from_clause = f" FROM {from_clause}" if from_clause else "" 959 return f"{force}INSTALL {this}{from_clause}"
970 def strtodate_sql(self, expression: exp.StrToDate) -> str: 971 if expression.args.get("safe"): 972 formatted_time = self.format_time(expression) 973 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 974 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
982 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 983 nano = expression.args.get("nano") 984 if nano is not None: 985 expression.set( 986 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 987 ) 988 989 return rename_func("MAKE_TIME")(self, expression)
991 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 992 sec = expression.args["sec"] 993 994 milli = expression.args.get("milli") 995 if milli is not None: 996 sec += milli.pop() / exp.Literal.number(1000.0) 997 998 nano = expression.args.get("nano") 999 if nano is not None: 1000 sec += nano.pop() / exp.Literal.number(1000000000.0) 1001 1002 if milli or nano: 1003 expression.set("sec", sec) 1004 1005 return rename_func("MAKE_TIMESTAMP")(self, expression)
1007 def tablesample_sql( 1008 self, 1009 expression: exp.TableSample, 1010 tablesample_keyword: t.Optional[str] = None, 1011 ) -> str: 1012 if not isinstance(expression.parent, exp.Select): 1013 # This sample clause only applies to a single source, not the entire resulting relation 1014 tablesample_keyword = "TABLESAMPLE" 1015 1016 if expression.args.get("size"): 1017 method = expression.args.get("method") 1018 if method and method.name.upper() != "RESERVOIR": 1019 self.unsupported( 1020 f"Sampling method {method} is not supported with a discrete sample count, " 1021 "defaulting to reservoir sampling" 1022 ) 1023 expression.set("method", exp.var("RESERVOIR")) 1024 1025 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
1032 def join_sql(self, expression: exp.Join) -> str: 1033 if ( 1034 not expression.args.get("using") 1035 and not expression.args.get("on") 1036 and not expression.method 1037 and (expression.kind in ("", "INNER", "OUTER")) 1038 ): 1039 # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause 1040 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1041 if isinstance(expression.this, exp.Unnest): 1042 return super().join_sql(expression.on(exp.true())) 1043 1044 expression.args.pop("side", None) 1045 expression.args.pop("kind", None) 1046 1047 return super().join_sql(expression)
1063 def bracket_sql(self, expression: exp.Bracket) -> str: 1064 if self.dialect.version >= Version("1.2"): 1065 return super().bracket_sql(expression) 1066 1067 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1068 this = expression.this 1069 if isinstance(this, exp.Array): 1070 this.replace(exp.paren(this)) 1071 1072 bracket = super().bracket_sql(expression) 1073 1074 if not expression.args.get("returns_list_for_maps"): 1075 if not this.type: 1076 from sqlglot.optimizer.annotate_types import annotate_types 1077 1078 this = annotate_types(this, dialect=self.dialect) 1079 1080 if this.is_type(exp.DataType.Type.MAP): 1081 bracket = f"({bracket})[1]" 1082 1083 return bracket
1085 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1086 expression_sql = self.sql(expression, "expression") 1087 1088 func = expression.this 1089 if isinstance(func, exp.PERCENTILES): 1090 # Make the order key the first arg and slide the fraction to the right 1091 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1092 order_col = expression.find(exp.Ordered) 1093 if order_col: 1094 func.set("expression", func.this) 1095 func.set("this", order_col.this) 1096 1097 this = self.sql(expression, "this").rstrip(")") 1098 1099 return f"{this}{expression_sql})"
1101 def length_sql(self, expression: exp.Length) -> str: 1102 arg = expression.this 1103 1104 # Dialects like BQ and Snowflake also accept binary values as args, so 1105 # DDB will attempt to infer the type or resort to case/when resolution 1106 if not expression.args.get("binary") or arg.is_string: 1107 return self.func("LENGTH", arg) 1108 1109 if not arg.type: 1110 from sqlglot.optimizer.annotate_types import annotate_types 1111 1112 arg = annotate_types(arg, dialect=self.dialect) 1113 1114 if arg.is_type(*exp.DataType.TEXT_TYPES): 1115 return self.func("LENGTH", arg) 1116 1117 # We need these casts to make duckdb's static type checker happy 1118 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1119 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1120 1121 case = ( 1122 exp.case(self.func("TYPEOF", arg)) 1123 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1124 .else_( 1125 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1126 ) # anonymous to break length_sql recursion 1127 ) 1128 1129 return self.sql(case)
1131 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1132 this = expression.this 1133 key = expression.args.get("key") 1134 key_sql = key.name if isinstance(key, exp.Expression) else "" 1135 value_sql = self.sql(expression, "value") 1136 1137 kv_sql = f"{key_sql} := {value_sql}" 1138 1139 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1140 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1141 if isinstance(this, exp.Struct) and not this.expressions: 1142 return self.func("STRUCT_PACK", kv_sql) 1143 1144 return self.func("STRUCT_INSERT", this, kv_sql)
1146 def unnest_sql(self, expression: exp.Unnest) -> str: 1147 explode_array = expression.args.get("explode_array") 1148 if explode_array: 1149 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1150 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1151 expression.expressions.append( 1152 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1153 ) 1154 1155 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1156 alias = expression.args.get("alias") 1157 if isinstance(alias, exp.TableAlias): 1158 expression.set("alias", None) 1159 if alias.columns: 1160 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1161 1162 unnest_sql = super().unnest_sql(expression) 1163 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1164 return self.sql(select) 1165 1166 return super().unnest_sql(expression)
1168 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1169 this = expression.this 1170 1171 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1172 # DuckDB should render IGNORE NULLS only for the general-purpose 1173 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1174 return super().ignorenulls_sql(expression) 1175 1176 if isinstance(this, exp.First): 1177 this = exp.AnyValue(this=this.this) 1178 1179 if not isinstance(this, exp.AnyValue): 1180 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1181 1182 return self.sql(this)
1184 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1185 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1186 # DuckDB should render RESPECT NULLS only for the general-purpose 1187 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1188 return super().respectnulls_sql(expression) 1189 1190 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1191 return self.sql(expression, "this")
1193 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1194 this = self.sql(expression, "this") 1195 null_text = self.sql(expression, "null") 1196 1197 if null_text: 1198 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1199 1200 return self.func("ARRAY_TO_STRING", this, expression.expression)
1202 @unsupported_args("position", "occurrence") 1203 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1204 group = expression.args.get("group") 1205 params = expression.args.get("parameters") 1206 1207 # Do not render group if there is no following argument, 1208 # and it's the default value for this dialect 1209 if ( 1210 not params 1211 and group 1212 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1213 ): 1214 group = None 1215 return self.func( 1216 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1217 )
1219 @unsupported_args("culture") 1220 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1221 fmt = expression.args.get("format") 1222 if fmt and fmt.is_int: 1223 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1224 1225 self.unsupported("Only integer formats are supported by NumberToStr") 1226 return self.function_fallback_sql(expression)
1239 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1240 this = expression.this 1241 parent = expression.parent 1242 1243 # The default Spark aliases are "pos" and "col", unless specified otherwise 1244 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1245 1246 if isinstance(parent, exp.Aliases): 1247 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1248 pos, col = parent.expressions 1249 elif isinstance(parent, exp.Table): 1250 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1251 alias = parent.args.get("alias") 1252 if alias: 1253 pos, col = alias.columns or [pos, col] 1254 alias.pop() 1255 1256 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1257 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1258 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1259 gen_subscripts = self.sql( 1260 exp.Alias( 1261 this=exp.Anonymous( 1262 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1263 ) 1264 - exp.Literal.number(1), 1265 alias=pos, 1266 ) 1267 ) 1268 1269 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1270 1271 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1272 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1273 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1274 1275 return posexplode_sql
1277 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1278 this = expression.this 1279 1280 if not this.type: 1281 from sqlglot.optimizer.annotate_types import annotate_types 1282 1283 this = annotate_types(this, dialect=self.dialect) 1284 1285 if this.is_type(*exp.DataType.TEXT_TYPES): 1286 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1287 1288 func = self.func( 1289 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1290 ) 1291 1292 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1293 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1294 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1295 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1296 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1297 return self.sql(exp.Cast(this=func, to=this.type)) 1298 1299 return self.sql(func)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql