sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arrow_json_extract_sql, 13 binary_from_function, 14 bool_xor_sql, 15 build_default_decimal_type, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 no_datetime_sql, 19 encode_decode_sql, 20 build_formatted_time, 21 inline_array_unless_query, 22 no_comment_column_constraint_sql, 23 no_safe_divide_sql, 24 no_time_sql, 25 no_timestamp_sql, 26 pivot_column_names, 27 rename_func, 28 str_position_sql, 29 str_to_time_sql, 30 timestamptrunc_sql, 31 timestrtotime_sql, 32 unit_to_var, 33 unit_to_str, 34 sha256_sql, 35 build_regexp_extract, 36 explode_to_unnest_sql, 37 no_make_interval_sql, 38) 39from sqlglot.generator import unsupported_args 40from sqlglot.helper import seq_get 41from sqlglot.tokens import TokenType 42from sqlglot.parser import binary_range_parser 43 44DATETIME_DELTA = t.Union[ 45 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 46] 47 48WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 49 exp.FirstValue, 50 exp.LastValue, 51 exp.Lag, 52 exp.Lead, 53 exp.NthValue, 54) 55 56 57def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 58 this = expression.this 59 unit = unit_to_var(expression) 60 op = ( 61 "+" 62 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 63 else "-" 64 ) 65 66 to_type: t.Optional[DATA_TYPE] = None 67 if isinstance(expression, exp.TsOrDsAdd): 68 to_type = expression.return_type 69 elif this.is_string: 70 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 71 to_type = ( 72 exp.DataType.Type.DATETIME 73 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 74 else exp.DataType.Type.DATE 75 ) 76 77 this = exp.cast(this, to_type) if to_type else this 78 79 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 80 81 82# BigQuery -> DuckDB conversion for the DATE function 83def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 84 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 85 zone = self.sql(expression, "zone") 86 87 if zone: 88 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 89 date_str = f"{date_str} || ' ' || {zone}" 90 91 # This will create a TIMESTAMP with time zone information 92 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 93 94 return result 95 96 97# BigQuery -> DuckDB conversion for the TIME_DIFF function 98def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIME) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 101 102 # Although the 2 dialects share similar signatures, BQ seems to inverse 103 # the sign of the result so the start/end time operands are flipped 104 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 105 106 107@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 108def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 109 return self.func("ARRAY_SORT", expression.this) 110 111 112def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 113 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 114 return self.func(name, expression.this) 115 116 117def _build_sort_array_desc(args: t.List) -> exp.Expression: 118 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 119 120 121def _build_date_diff(args: t.List) -> exp.Expression: 122 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 123 124 125def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 126 def _builder(args: t.List) -> exp.GenerateSeries: 127 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 128 if len(args) == 1: 129 # DuckDB uses 0 as a default for the series' start when it's omitted 130 args.insert(0, exp.Literal.number("0")) 131 132 gen_series = exp.GenerateSeries.from_arg_list(args) 133 gen_series.set("is_end_exclusive", end_exclusive) 134 135 return gen_series 136 137 return _builder 138 139 140def _build_make_timestamp(args: t.List) -> exp.Expression: 141 if len(args) == 1: 142 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 143 144 return exp.TimestampFromParts( 145 year=seq_get(args, 0), 146 month=seq_get(args, 1), 147 day=seq_get(args, 2), 148 hour=seq_get(args, 3), 149 min=seq_get(args, 4), 150 sec=seq_get(args, 5), 151 ) 152 153 154def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 155 args: t.List[str] = [] 156 157 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 158 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 159 # The transformation to ROW will take place if: 160 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 161 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 162 ancestor_cast = expression.find_ancestor(exp.Cast) 163 is_bq_inline_struct = ( 164 (expression.find(exp.PropertyEQ) is None) 165 and ancestor_cast 166 and any( 167 casted_type.is_type(exp.DataType.Type.STRUCT) 168 for casted_type in ancestor_cast.find_all(exp.DataType) 169 ) 170 ) 171 172 for i, expr in enumerate(expression.expressions): 173 is_property_eq = isinstance(expr, exp.PropertyEQ) 174 value = expr.expression if is_property_eq else expr 175 176 if is_bq_inline_struct: 177 args.append(self.sql(value)) 178 else: 179 key = expr.name if is_property_eq else f"_{i}" 180 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 181 182 csv_args = ", ".join(args) 183 184 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 185 186 187def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 188 if expression.is_type("array"): 189 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 190 191 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 192 if expression.is_type( 193 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 194 ): 195 return expression.this.value 196 197 return self.datatype_sql(expression) 198 199 200def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 201 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 202 return f"CAST({sql} AS TEXT)" 203 204 205def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TO_TIMESTAMP", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("EPOCH_MS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("MAKE_TIMESTAMP", timestamp) 215 216 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 217 218 219WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 220 221 222def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 223 arrow_sql = arrow_json_extract_sql(self, expression) 224 if not expression.same_parent and isinstance( 225 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 226 ): 227 arrow_sql = self.wrap(arrow_sql) 228 return arrow_sql 229 230 231def _implicit_datetime_cast( 232 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 233) -> t.Optional[exp.Expression]: 234 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 235 236 237def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 238 this = _implicit_datetime_cast(expression.this) 239 expr = _implicit_datetime_cast(expression.expression) 240 241 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 242 243 244def _generate_datetime_array_sql( 245 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 246) -> str: 247 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 248 249 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 250 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 251 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 252 253 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 254 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 255 start=start, end=end, step=expression.args.get("step") 256 ) 257 258 if is_generate_date_array: 259 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 260 # GENERATE_DATE_ARRAY we must cast it back to DATE array 261 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 262 263 return self.sql(gen_series) 264 265 266def _json_extract_value_array_sql( 267 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 268) -> str: 269 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 270 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 271 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 272 273 274class DuckDB(Dialect): 275 NULL_ORDERING = "nulls_are_last" 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SAFE_DIVISION = True 278 INDEX_OFFSET = 1 279 CONCAT_COALESCE = True 280 SUPPORTS_ORDER_BY_ALL = True 281 SUPPORTS_FIXED_SIZE_ARRAYS = True 282 STRICT_JSON_PATH_SYNTAX = False 283 284 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 285 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 286 287 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 288 if isinstance(path, exp.Literal): 289 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 290 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 291 # This check ensures we'll avoid trying to parse these as JSON paths, which can 292 # either result in a noisy warning or in an invalid representation of the path. 293 path_text = path.name 294 if path_text.startswith("/") or "[#" in path_text: 295 return path 296 297 return super().to_json_path(path) 298 299 class Tokenizer(tokens.Tokenizer): 300 HEREDOC_STRINGS = ["$"] 301 302 HEREDOC_TAG_IS_IDENTIFIER = True 303 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 304 305 KEYWORDS = { 306 **tokens.Tokenizer.KEYWORDS, 307 "//": TokenType.DIV, 308 "**": TokenType.DSTAR, 309 "^@": TokenType.CARET_AT, 310 "@>": TokenType.AT_GT, 311 "<@": TokenType.LT_AT, 312 "ATTACH": TokenType.ATTACH, 313 "BINARY": TokenType.VARBINARY, 314 "BITSTRING": TokenType.BIT, 315 "BPCHAR": TokenType.TEXT, 316 "CHAR": TokenType.TEXT, 317 "CHARACTER VARYING": TokenType.TEXT, 318 "DETACH": TokenType.DETACH, 319 "EXCLUDE": TokenType.EXCEPT, 320 "LOGICAL": TokenType.BOOLEAN, 321 "ONLY": TokenType.ONLY, 322 "PIVOT_WIDER": TokenType.PIVOT, 323 "POSITIONAL": TokenType.POSITIONAL, 324 "SIGNED": TokenType.INT, 325 "STRING": TokenType.TEXT, 326 "SUMMARIZE": TokenType.SUMMARIZE, 327 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 328 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 329 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 330 "TIMESTAMP_US": TokenType.TIMESTAMP, 331 "UBIGINT": TokenType.UBIGINT, 332 "UINTEGER": TokenType.UINT, 333 "USMALLINT": TokenType.USMALLINT, 334 "UTINYINT": TokenType.UTINYINT, 335 "VARCHAR": TokenType.TEXT, 336 } 337 KEYWORDS.pop("/*+") 338 339 SINGLE_TOKENS = { 340 **tokens.Tokenizer.SINGLE_TOKENS, 341 "$": TokenType.PARAMETER, 342 } 343 344 class Parser(parser.Parser): 345 BITWISE = { 346 **parser.Parser.BITWISE, 347 TokenType.TILDA: exp.RegexpLike, 348 } 349 BITWISE.pop(TokenType.CARET) 350 351 RANGE_PARSERS = { 352 **parser.Parser.RANGE_PARSERS, 353 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 354 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 355 } 356 357 EXPONENT = { 358 **parser.Parser.EXPONENT, 359 TokenType.CARET: exp.Pow, 360 TokenType.DSTAR: exp.Pow, 361 } 362 363 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "ENCODE": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "EPOCH": exp.TimeToUnix.from_arg_list, 380 "EPOCH_MS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "JSON": exp.ParseJSON.from_arg_list, 384 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 385 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 386 "LIST_HAS": exp.ArrayContains.from_arg_list, 387 "LIST_REVERSE_SORT": _build_sort_array_desc, 388 "LIST_SORT": exp.SortArray.from_arg_list, 389 "LIST_VALUE": lambda args: exp.Array(expressions=args), 390 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 391 "MAKE_TIMESTAMP": _build_make_timestamp, 392 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 393 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 394 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 395 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 396 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 397 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 398 this=seq_get(args, 0), 399 expression=seq_get(args, 1), 400 replacement=seq_get(args, 2), 401 modifiers=seq_get(args, 3), 402 ), 403 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 404 "STRING_SPLIT": exp.Split.from_arg_list, 405 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 406 "STRING_TO_ARRAY": exp.Split.from_arg_list, 407 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 408 "STRUCT_PACK": exp.Struct.from_arg_list, 409 "STR_SPLIT": exp.Split.from_arg_list, 410 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 411 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 412 "UNNEST": exp.Explode.from_arg_list, 413 "XOR": binary_from_function(exp.BitwiseXor), 414 "GENERATE_SERIES": _build_generate_series(), 415 "RANGE": _build_generate_series(end_exclusive=True), 416 "EDITDIST3": exp.Levenshtein.from_arg_list, 417 } 418 419 FUNCTIONS.pop("DATE_SUB") 420 FUNCTIONS.pop("GLOB") 421 422 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 423 FUNCTION_PARSERS.pop("DECODE") 424 425 NO_PAREN_FUNCTION_PARSERS = { 426 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 427 "MAP": lambda self: self._parse_map(), 428 } 429 430 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 431 TokenType.SEMI, 432 TokenType.ANTI, 433 } 434 435 PLACEHOLDER_PARSERS = { 436 **parser.Parser.PLACEHOLDER_PARSERS, 437 TokenType.PARAMETER: lambda self: ( 438 self.expression(exp.Placeholder, this=self._prev.text) 439 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 440 else None 441 ), 442 } 443 444 TYPE_CONVERTERS = { 445 # https://duckdb.org/docs/sql/data_types/numeric 446 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 447 # https://duckdb.org/docs/sql/data_types/text 448 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 449 } 450 451 STATEMENT_PARSERS = { 452 **parser.Parser.STATEMENT_PARSERS, 453 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 454 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 455 } 456 457 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 458 # https://duckdb.org/docs/sql/samples.html 459 sample = super()._parse_table_sample(as_modifier=as_modifier) 460 if sample and not sample.args.get("method"): 461 if sample.args.get("size"): 462 sample.set("method", exp.var("RESERVOIR")) 463 else: 464 sample.set("method", exp.var("SYSTEM")) 465 466 return sample 467 468 def _parse_bracket( 469 self, this: t.Optional[exp.Expression] = None 470 ) -> t.Optional[exp.Expression]: 471 bracket = super()._parse_bracket(this) 472 if isinstance(bracket, exp.Bracket): 473 bracket.set("returns_list_for_maps", True) 474 475 return bracket 476 477 def _parse_map(self) -> exp.ToMap | exp.Map: 478 if self._match(TokenType.L_BRACE, advance=False): 479 return self.expression(exp.ToMap, this=self._parse_bracket()) 480 481 args = self._parse_wrapped_csv(self._parse_assignment) 482 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 483 484 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 485 return self._parse_field_def() 486 487 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 488 if len(aggregations) == 1: 489 return super()._pivot_column_names(aggregations) 490 return pivot_column_names(aggregations, dialect="duckdb") 491 492 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 493 def _parse_attach_option() -> exp.AttachOption: 494 return self.expression( 495 exp.AttachOption, 496 this=self._parse_var(any_token=True), 497 expression=self._parse_field(any_token=True), 498 ) 499 500 self._match(TokenType.DATABASE) 501 exists = self._parse_exists(not_=is_attach) 502 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 503 504 if self._match(TokenType.L_PAREN, advance=False): 505 expressions = self._parse_wrapped_csv(_parse_attach_option) 506 else: 507 expressions = None 508 509 return ( 510 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 511 if is_attach 512 else self.expression(exp.Detach, this=this, exists=exists) 513 ) 514 515 class Generator(generator.Generator): 516 PARAMETER_TOKEN = "$" 517 NAMED_PLACEHOLDER_TOKEN = "$" 518 JOIN_HINTS = False 519 TABLE_HINTS = False 520 QUERY_HINTS = False 521 LIMIT_FETCH = "LIMIT" 522 STRUCT_DELIMITER = ("(", ")") 523 RENAME_TABLE_WITH_DB = False 524 NVL2_SUPPORTED = False 525 SEMI_ANTI_JOIN_WITH_SIDE = False 526 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 527 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 528 LAST_DAY_SUPPORTS_DATE_PART = False 529 JSON_KEY_VALUE_PAIR_SEP = "," 530 IGNORE_NULLS_IN_FUNC = True 531 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 532 SUPPORTS_CREATE_TABLE_LIKE = False 533 MULTI_ARG_DISTINCT = False 534 CAN_IMPLEMENT_ARRAY_ANY = True 535 SUPPORTS_TO_NUMBER = False 536 COPY_HAS_INTO_KEYWORD = False 537 STAR_EXCEPT = "EXCLUDE" 538 PAD_FILL_PATTERN_IS_REQUIRED = True 539 ARRAY_CONCAT_IS_VAR_LEN = False 540 ARRAY_SIZE_DIM_REQUIRED = False 541 542 TRANSFORMS = { 543 **generator.Generator.TRANSFORMS, 544 exp.ApproxDistinct: approx_count_distinct_sql, 545 exp.Array: inline_array_unless_query, 546 exp.ArrayFilter: rename_func("LIST_FILTER"), 547 exp.ArraySort: _array_sort_sql, 548 exp.ArraySum: rename_func("LIST_SUM"), 549 exp.BitwiseXor: rename_func("XOR"), 550 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 551 exp.CurrentDate: lambda *_: "CURRENT_DATE", 552 exp.CurrentTime: lambda *_: "CURRENT_TIME", 553 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 554 exp.DayOfMonth: rename_func("DAYOFMONTH"), 555 exp.DayOfWeek: rename_func("DAYOFWEEK"), 556 exp.DayOfWeekIso: rename_func("ISODOW"), 557 exp.DayOfYear: rename_func("DAYOFYEAR"), 558 exp.DataType: _datatype_sql, 559 exp.Date: _date_sql, 560 exp.DateAdd: _date_delta_sql, 561 exp.DateFromParts: rename_func("MAKE_DATE"), 562 exp.DateSub: _date_delta_sql, 563 exp.DateDiff: _date_diff_sql, 564 exp.DateStrToDate: datestrtodate_sql, 565 exp.Datetime: no_datetime_sql, 566 exp.DatetimeSub: _date_delta_sql, 567 exp.DatetimeAdd: _date_delta_sql, 568 exp.DateToDi: lambda self, 569 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 570 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 571 exp.DiToDate: lambda self, 572 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 573 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 574 exp.GenerateDateArray: _generate_datetime_array_sql, 575 exp.GenerateTimestampArray: _generate_datetime_array_sql, 576 exp.Explode: rename_func("UNNEST"), 577 exp.IntDiv: lambda self, e: self.binary(e, "//"), 578 exp.IsInf: rename_func("ISINF"), 579 exp.IsNan: rename_func("ISNAN"), 580 exp.JSONBExists: rename_func("JSON_EXISTS"), 581 exp.JSONExtract: _arrow_json_extract_sql, 582 exp.JSONExtractArray: _json_extract_value_array_sql, 583 exp.JSONExtractScalar: _arrow_json_extract_sql, 584 exp.JSONFormat: _json_format_sql, 585 exp.JSONValueArray: _json_extract_value_array_sql, 586 exp.Lateral: explode_to_unnest_sql, 587 exp.LogicalOr: rename_func("BOOL_OR"), 588 exp.LogicalAnd: rename_func("BOOL_AND"), 589 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 590 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 591 exp.MonthsBetween: lambda self, e: self.func( 592 "DATEDIFF", 593 "'month'", 594 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 595 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 596 ), 597 exp.PercentileCont: rename_func("QUANTILE_CONT"), 598 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 599 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 600 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 601 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 602 exp.RegexpReplace: lambda self, e: self.func( 603 "REGEXP_REPLACE", 604 e.this, 605 e.expression, 606 e.args.get("replacement"), 607 e.args.get("modifiers"), 608 ), 609 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 610 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 611 exp.Return: lambda self, e: self.sql(e, "this"), 612 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 613 exp.Rand: rename_func("RANDOM"), 614 exp.SafeDivide: no_safe_divide_sql, 615 exp.SHA: rename_func("SHA1"), 616 exp.SHA2: sha256_sql, 617 exp.Split: rename_func("STR_SPLIT"), 618 exp.SortArray: _sort_array_sql, 619 exp.StrPosition: str_position_sql, 620 exp.StrToUnix: lambda self, e: self.func( 621 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 622 ), 623 exp.Struct: _struct_sql, 624 exp.Transform: rename_func("LIST_TRANSFORM"), 625 exp.TimeAdd: _date_delta_sql, 626 exp.Time: no_time_sql, 627 exp.TimeDiff: _timediff_sql, 628 exp.Timestamp: no_timestamp_sql, 629 exp.TimestampDiff: lambda self, e: self.func( 630 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 631 ), 632 exp.TimestampTrunc: timestamptrunc_sql(), 633 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 634 exp.TimeStrToTime: timestrtotime_sql, 635 exp.TimeStrToUnix: lambda self, e: self.func( 636 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 637 ), 638 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 639 exp.TimeToUnix: rename_func("EPOCH"), 640 exp.TsOrDiToDi: lambda self, 641 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 642 exp.TsOrDsAdd: _date_delta_sql, 643 exp.TsOrDsDiff: lambda self, e: self.func( 644 "DATE_DIFF", 645 f"'{e.args.get('unit') or 'DAY'}'", 646 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 647 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 648 ), 649 exp.UnixToStr: lambda self, e: self.func( 650 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 651 ), 652 exp.DatetimeTrunc: lambda self, e: self.func( 653 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 654 ), 655 exp.UnixToTime: _unix_to_time_sql, 656 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 657 exp.VariancePop: rename_func("VAR_POP"), 658 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 659 exp.Xor: bool_xor_sql, 660 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 661 rename_func("LEVENSHTEIN") 662 ), 663 } 664 665 SUPPORTED_JSON_PATH_PARTS = { 666 exp.JSONPathKey, 667 exp.JSONPathRoot, 668 exp.JSONPathSubscript, 669 exp.JSONPathWildcard, 670 } 671 672 TYPE_MAPPING = { 673 **generator.Generator.TYPE_MAPPING, 674 exp.DataType.Type.BINARY: "BLOB", 675 exp.DataType.Type.BPCHAR: "TEXT", 676 exp.DataType.Type.CHAR: "TEXT", 677 exp.DataType.Type.DATETIME: "TIMESTAMP", 678 exp.DataType.Type.FLOAT: "REAL", 679 exp.DataType.Type.NCHAR: "TEXT", 680 exp.DataType.Type.NVARCHAR: "TEXT", 681 exp.DataType.Type.UINT: "UINTEGER", 682 exp.DataType.Type.VARBINARY: "BLOB", 683 exp.DataType.Type.ROWVERSION: "BLOB", 684 exp.DataType.Type.VARCHAR: "TEXT", 685 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 686 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 687 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 688 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 689 } 690 691 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 692 RESERVED_KEYWORDS = { 693 "array", 694 "analyse", 695 "union", 696 "all", 697 "when", 698 "in_p", 699 "default", 700 "create_p", 701 "window", 702 "asymmetric", 703 "to", 704 "else", 705 "localtime", 706 "from", 707 "end_p", 708 "select", 709 "current_date", 710 "foreign", 711 "with", 712 "grant", 713 "session_user", 714 "or", 715 "except", 716 "references", 717 "fetch", 718 "limit", 719 "group_p", 720 "leading", 721 "into", 722 "collate", 723 "offset", 724 "do", 725 "then", 726 "localtimestamp", 727 "check_p", 728 "lateral_p", 729 "current_role", 730 "where", 731 "asc_p", 732 "placing", 733 "desc_p", 734 "user", 735 "unique", 736 "initially", 737 "column", 738 "both", 739 "some", 740 "as", 741 "any", 742 "only", 743 "deferrable", 744 "null_p", 745 "current_time", 746 "true_p", 747 "table", 748 "case", 749 "trailing", 750 "variadic", 751 "for", 752 "on", 753 "distinct", 754 "false_p", 755 "not", 756 "constraint", 757 "current_timestamp", 758 "returning", 759 "primary", 760 "intersect", 761 "having", 762 "analyze", 763 "current_user", 764 "and", 765 "cast", 766 "symmetric", 767 "using", 768 "order", 769 "current_catalog", 770 } 771 772 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 773 774 # DuckDB doesn't generally support CREATE TABLE .. properties 775 # https://duckdb.org/docs/sql/statements/create_table.html 776 PROPERTIES_LOCATION = { 777 prop: exp.Properties.Location.UNSUPPORTED 778 for prop in generator.Generator.PROPERTIES_LOCATION 779 } 780 781 # There are a few exceptions (e.g. temporary tables) which are supported or 782 # can be transpiled to DuckDB, so we explicitly override them accordingly 783 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 784 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 785 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 786 787 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 788 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 789 790 def strtotime_sql(self, expression: exp.StrToTime) -> str: 791 if expression.args.get("safe"): 792 formatted_time = self.format_time(expression) 793 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 794 return str_to_time_sql(self, expression) 795 796 def strtodate_sql(self, expression: exp.StrToDate) -> str: 797 if expression.args.get("safe"): 798 formatted_time = self.format_time(expression) 799 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 800 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 801 802 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 803 arg = expression.this 804 if expression.args.get("safe"): 805 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 806 return self.func("JSON", arg) 807 808 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 809 nano = expression.args.get("nano") 810 if nano is not None: 811 expression.set( 812 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 813 ) 814 815 return rename_func("MAKE_TIME")(self, expression) 816 817 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 818 sec = expression.args["sec"] 819 820 milli = expression.args.get("milli") 821 if milli is not None: 822 sec += milli.pop() / exp.Literal.number(1000.0) 823 824 nano = expression.args.get("nano") 825 if nano is not None: 826 sec += nano.pop() / exp.Literal.number(1000000000.0) 827 828 if milli or nano: 829 expression.set("sec", sec) 830 831 return rename_func("MAKE_TIMESTAMP")(self, expression) 832 833 def tablesample_sql( 834 self, 835 expression: exp.TableSample, 836 tablesample_keyword: t.Optional[str] = None, 837 ) -> str: 838 if not isinstance(expression.parent, exp.Select): 839 # This sample clause only applies to a single source, not the entire resulting relation 840 tablesample_keyword = "TABLESAMPLE" 841 842 if expression.args.get("size"): 843 method = expression.args.get("method") 844 if method and method.name.upper() != "RESERVOIR": 845 self.unsupported( 846 f"Sampling method {method} is not supported with a discrete sample count, " 847 "defaulting to reservoir sampling" 848 ) 849 expression.set("method", exp.var("RESERVOIR")) 850 851 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 852 853 def interval_sql(self, expression: exp.Interval) -> str: 854 multiplier: t.Optional[int] = None 855 unit = expression.text("unit").lower() 856 857 if unit.startswith("week"): 858 multiplier = 7 859 if unit.startswith("quarter"): 860 multiplier = 90 861 862 if multiplier: 863 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 864 865 return super().interval_sql(expression) 866 867 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 868 if isinstance(expression.parent, exp.UserDefinedFunction): 869 return self.sql(expression, "this") 870 return super().columndef_sql(expression, sep) 871 872 def join_sql(self, expression: exp.Join) -> str: 873 if ( 874 expression.side == "LEFT" 875 and not expression.args.get("on") 876 and isinstance(expression.this, exp.Unnest) 877 ): 878 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 879 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 880 return super().join_sql(expression.on(exp.true())) 881 882 return super().join_sql(expression) 883 884 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 885 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 886 if expression.args.get("is_end_exclusive"): 887 return rename_func("RANGE")(self, expression) 888 889 return self.function_fallback_sql(expression) 890 891 def bracket_sql(self, expression: exp.Bracket) -> str: 892 this = expression.this 893 if isinstance(this, exp.Array): 894 this.replace(exp.paren(this)) 895 896 bracket = super().bracket_sql(expression) 897 898 if not expression.args.get("returns_list_for_maps"): 899 if not this.type: 900 from sqlglot.optimizer.annotate_types import annotate_types 901 902 this = annotate_types(this) 903 904 if this.is_type(exp.DataType.Type.MAP): 905 bracket = f"({bracket})[1]" 906 907 return bracket 908 909 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 910 expression_sql = self.sql(expression, "expression") 911 912 func = expression.this 913 if isinstance(func, exp.PERCENTILES): 914 # Make the order key the first arg and slide the fraction to the right 915 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 916 order_col = expression.find(exp.Ordered) 917 if order_col: 918 func.set("expression", func.this) 919 func.set("this", order_col.this) 920 921 this = self.sql(expression, "this").rstrip(")") 922 923 return f"{this}{expression_sql})" 924 925 def length_sql(self, expression: exp.Length) -> str: 926 arg = expression.this 927 928 # Dialects like BQ and Snowflake also accept binary values as args, so 929 # DDB will attempt to infer the type or resort to case/when resolution 930 if not expression.args.get("binary") or arg.is_string: 931 return self.func("LENGTH", arg) 932 933 if not arg.type: 934 from sqlglot.optimizer.annotate_types import annotate_types 935 936 arg = annotate_types(arg) 937 938 if arg.is_type(*exp.DataType.TEXT_TYPES): 939 return self.func("LENGTH", arg) 940 941 # We need these casts to make duckdb's static type checker happy 942 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 943 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 944 945 case = ( 946 exp.case(self.func("TYPEOF", arg)) 947 .when( 948 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 949 ) # anonymous to break length_sql recursion 950 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 951 ) 952 953 return self.sql(case) 954 955 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 956 this = expression.this 957 key = expression.args.get("key") 958 key_sql = key.name if isinstance(key, exp.Expression) else "" 959 value_sql = self.sql(expression, "value") 960 961 kv_sql = f"{key_sql} := {value_sql}" 962 963 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 964 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 965 if isinstance(this, exp.Struct) and not this.expressions: 966 return self.func("STRUCT_PACK", kv_sql) 967 968 return self.func("STRUCT_INSERT", this, kv_sql) 969 970 def unnest_sql(self, expression: exp.Unnest) -> str: 971 explode_array = expression.args.get("explode_array") 972 if explode_array: 973 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 974 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 975 expression.expressions.append( 976 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 977 ) 978 979 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 980 alias = expression.args.get("alias") 981 if alias: 982 expression.set("alias", None) 983 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 984 985 unnest_sql = super().unnest_sql(expression) 986 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 987 return self.sql(select) 988 989 return super().unnest_sql(expression) 990 991 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 992 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 993 # DuckDB should render IGNORE NULLS only for the general-purpose 994 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 995 return super().ignorenulls_sql(expression) 996 997 return self.sql(expression, "this") 998 999 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1000 this = self.sql(expression, "this") 1001 null_text = self.sql(expression, "null") 1002 1003 if null_text: 1004 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1005 1006 return self.func("ARRAY_TO_STRING", this, expression.expression) 1007 1008 @unsupported_args("position", "occurrence") 1009 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1010 group = expression.args.get("group") 1011 params = expression.args.get("parameters") 1012 1013 # Do not render group if there is no following argument, 1014 # and it's the default value for this dialect 1015 if ( 1016 not params 1017 and group 1018 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1019 ): 1020 group = None 1021 return self.func( 1022 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1023 )
275class DuckDB(Dialect): 276 NULL_ORDERING = "nulls_are_last" 277 SUPPORTS_USER_DEFINED_TYPES = False 278 SAFE_DIVISION = True 279 INDEX_OFFSET = 1 280 CONCAT_COALESCE = True 281 SUPPORTS_ORDER_BY_ALL = True 282 SUPPORTS_FIXED_SIZE_ARRAYS = True 283 STRICT_JSON_PATH_SYNTAX = False 284 285 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 286 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 287 288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path) 299 300 class Tokenizer(tokens.Tokenizer): 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.ATTACH, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "DETACH": TokenType.DETACH, 320 "EXCLUDE": TokenType.EXCEPT, 321 "LOGICAL": TokenType.BOOLEAN, 322 "ONLY": TokenType.ONLY, 323 "PIVOT_WIDER": TokenType.PIVOT, 324 "POSITIONAL": TokenType.POSITIONAL, 325 "SIGNED": TokenType.INT, 326 "STRING": TokenType.TEXT, 327 "SUMMARIZE": TokenType.SUMMARIZE, 328 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 329 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 330 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 331 "TIMESTAMP_US": TokenType.TIMESTAMP, 332 "UBIGINT": TokenType.UBIGINT, 333 "UINTEGER": TokenType.UINT, 334 "USMALLINT": TokenType.USMALLINT, 335 "UTINYINT": TokenType.UTINYINT, 336 "VARCHAR": TokenType.TEXT, 337 } 338 KEYWORDS.pop("/*+") 339 340 SINGLE_TOKENS = { 341 **tokens.Tokenizer.SINGLE_TOKENS, 342 "$": TokenType.PARAMETER, 343 } 344 345 class Parser(parser.Parser): 346 BITWISE = { 347 **parser.Parser.BITWISE, 348 TokenType.TILDA: exp.RegexpLike, 349 } 350 BITWISE.pop(TokenType.CARET) 351 352 RANGE_PARSERS = { 353 **parser.Parser.RANGE_PARSERS, 354 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 355 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 356 } 357 358 EXPONENT = { 359 **parser.Parser.EXPONENT, 360 TokenType.CARET: exp.Pow, 361 TokenType.DSTAR: exp.Pow, 362 } 363 364 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_HAS": exp.ArrayContains.from_arg_list, 388 "LIST_REVERSE_SORT": _build_sort_array_desc, 389 "LIST_SORT": exp.SortArray.from_arg_list, 390 "LIST_VALUE": lambda args: exp.Array(expressions=args), 391 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 392 "MAKE_TIMESTAMP": _build_make_timestamp, 393 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 394 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 395 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 396 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 397 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 398 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 399 this=seq_get(args, 0), 400 expression=seq_get(args, 1), 401 replacement=seq_get(args, 2), 402 modifiers=seq_get(args, 3), 403 ), 404 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 405 "STRING_SPLIT": exp.Split.from_arg_list, 406 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 407 "STRING_TO_ARRAY": exp.Split.from_arg_list, 408 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 409 "STRUCT_PACK": exp.Struct.from_arg_list, 410 "STR_SPLIT": exp.Split.from_arg_list, 411 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 413 "UNNEST": exp.Explode.from_arg_list, 414 "XOR": binary_from_function(exp.BitwiseXor), 415 "GENERATE_SERIES": _build_generate_series(), 416 "RANGE": _build_generate_series(end_exclusive=True), 417 "EDITDIST3": exp.Levenshtein.from_arg_list, 418 } 419 420 FUNCTIONS.pop("DATE_SUB") 421 FUNCTIONS.pop("GLOB") 422 423 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 424 FUNCTION_PARSERS.pop("DECODE") 425 426 NO_PAREN_FUNCTION_PARSERS = { 427 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 428 "MAP": lambda self: self._parse_map(), 429 } 430 431 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 432 TokenType.SEMI, 433 TokenType.ANTI, 434 } 435 436 PLACEHOLDER_PARSERS = { 437 **parser.Parser.PLACEHOLDER_PARSERS, 438 TokenType.PARAMETER: lambda self: ( 439 self.expression(exp.Placeholder, this=self._prev.text) 440 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 441 else None 442 ), 443 } 444 445 TYPE_CONVERTERS = { 446 # https://duckdb.org/docs/sql/data_types/numeric 447 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 448 # https://duckdb.org/docs/sql/data_types/text 449 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 450 } 451 452 STATEMENT_PARSERS = { 453 **parser.Parser.STATEMENT_PARSERS, 454 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 455 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 456 } 457 458 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 459 # https://duckdb.org/docs/sql/samples.html 460 sample = super()._parse_table_sample(as_modifier=as_modifier) 461 if sample and not sample.args.get("method"): 462 if sample.args.get("size"): 463 sample.set("method", exp.var("RESERVOIR")) 464 else: 465 sample.set("method", exp.var("SYSTEM")) 466 467 return sample 468 469 def _parse_bracket( 470 self, this: t.Optional[exp.Expression] = None 471 ) -> t.Optional[exp.Expression]: 472 bracket = super()._parse_bracket(this) 473 if isinstance(bracket, exp.Bracket): 474 bracket.set("returns_list_for_maps", True) 475 476 return bracket 477 478 def _parse_map(self) -> exp.ToMap | exp.Map: 479 if self._match(TokenType.L_BRACE, advance=False): 480 return self.expression(exp.ToMap, this=self._parse_bracket()) 481 482 args = self._parse_wrapped_csv(self._parse_assignment) 483 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 484 485 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 486 return self._parse_field_def() 487 488 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 489 if len(aggregations) == 1: 490 return super()._pivot_column_names(aggregations) 491 return pivot_column_names(aggregations, dialect="duckdb") 492 493 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 494 def _parse_attach_option() -> exp.AttachOption: 495 return self.expression( 496 exp.AttachOption, 497 this=self._parse_var(any_token=True), 498 expression=self._parse_field(any_token=True), 499 ) 500 501 self._match(TokenType.DATABASE) 502 exists = self._parse_exists(not_=is_attach) 503 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 504 505 if self._match(TokenType.L_PAREN, advance=False): 506 expressions = self._parse_wrapped_csv(_parse_attach_option) 507 else: 508 expressions = None 509 510 return ( 511 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 512 if is_attach 513 else self.expression(exp.Detach, this=this, exists=exists) 514 ) 515 516 class Generator(generator.Generator): 517 PARAMETER_TOKEN = "$" 518 NAMED_PLACEHOLDER_TOKEN = "$" 519 JOIN_HINTS = False 520 TABLE_HINTS = False 521 QUERY_HINTS = False 522 LIMIT_FETCH = "LIMIT" 523 STRUCT_DELIMITER = ("(", ")") 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 SEMI_ANTI_JOIN_WITH_SIDE = False 527 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 528 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 529 LAST_DAY_SUPPORTS_DATE_PART = False 530 JSON_KEY_VALUE_PAIR_SEP = "," 531 IGNORE_NULLS_IN_FUNC = True 532 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 533 SUPPORTS_CREATE_TABLE_LIKE = False 534 MULTI_ARG_DISTINCT = False 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 SUPPORTS_TO_NUMBER = False 537 COPY_HAS_INTO_KEYWORD = False 538 STAR_EXCEPT = "EXCLUDE" 539 PAD_FILL_PATTERN_IS_REQUIRED = True 540 ARRAY_CONCAT_IS_VAR_LEN = False 541 ARRAY_SIZE_DIM_REQUIRED = False 542 543 TRANSFORMS = { 544 **generator.Generator.TRANSFORMS, 545 exp.ApproxDistinct: approx_count_distinct_sql, 546 exp.Array: inline_array_unless_query, 547 exp.ArrayFilter: rename_func("LIST_FILTER"), 548 exp.ArraySort: _array_sort_sql, 549 exp.ArraySum: rename_func("LIST_SUM"), 550 exp.BitwiseXor: rename_func("XOR"), 551 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 552 exp.CurrentDate: lambda *_: "CURRENT_DATE", 553 exp.CurrentTime: lambda *_: "CURRENT_TIME", 554 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 555 exp.DayOfMonth: rename_func("DAYOFMONTH"), 556 exp.DayOfWeek: rename_func("DAYOFWEEK"), 557 exp.DayOfWeekIso: rename_func("ISODOW"), 558 exp.DayOfYear: rename_func("DAYOFYEAR"), 559 exp.DataType: _datatype_sql, 560 exp.Date: _date_sql, 561 exp.DateAdd: _date_delta_sql, 562 exp.DateFromParts: rename_func("MAKE_DATE"), 563 exp.DateSub: _date_delta_sql, 564 exp.DateDiff: _date_diff_sql, 565 exp.DateStrToDate: datestrtodate_sql, 566 exp.Datetime: no_datetime_sql, 567 exp.DatetimeSub: _date_delta_sql, 568 exp.DatetimeAdd: _date_delta_sql, 569 exp.DateToDi: lambda self, 570 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 571 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 572 exp.DiToDate: lambda self, 573 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 574 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 575 exp.GenerateDateArray: _generate_datetime_array_sql, 576 exp.GenerateTimestampArray: _generate_datetime_array_sql, 577 exp.Explode: rename_func("UNNEST"), 578 exp.IntDiv: lambda self, e: self.binary(e, "//"), 579 exp.IsInf: rename_func("ISINF"), 580 exp.IsNan: rename_func("ISNAN"), 581 exp.JSONBExists: rename_func("JSON_EXISTS"), 582 exp.JSONExtract: _arrow_json_extract_sql, 583 exp.JSONExtractArray: _json_extract_value_array_sql, 584 exp.JSONExtractScalar: _arrow_json_extract_sql, 585 exp.JSONFormat: _json_format_sql, 586 exp.JSONValueArray: _json_extract_value_array_sql, 587 exp.Lateral: explode_to_unnest_sql, 588 exp.LogicalOr: rename_func("BOOL_OR"), 589 exp.LogicalAnd: rename_func("BOOL_AND"), 590 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 591 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 592 exp.MonthsBetween: lambda self, e: self.func( 593 "DATEDIFF", 594 "'month'", 595 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 596 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 597 ), 598 exp.PercentileCont: rename_func("QUANTILE_CONT"), 599 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 600 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 601 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 602 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 603 exp.RegexpReplace: lambda self, e: self.func( 604 "REGEXP_REPLACE", 605 e.this, 606 e.expression, 607 e.args.get("replacement"), 608 e.args.get("modifiers"), 609 ), 610 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 611 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 612 exp.Return: lambda self, e: self.sql(e, "this"), 613 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 614 exp.Rand: rename_func("RANDOM"), 615 exp.SafeDivide: no_safe_divide_sql, 616 exp.SHA: rename_func("SHA1"), 617 exp.SHA2: sha256_sql, 618 exp.Split: rename_func("STR_SPLIT"), 619 exp.SortArray: _sort_array_sql, 620 exp.StrPosition: str_position_sql, 621 exp.StrToUnix: lambda self, e: self.func( 622 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 623 ), 624 exp.Struct: _struct_sql, 625 exp.Transform: rename_func("LIST_TRANSFORM"), 626 exp.TimeAdd: _date_delta_sql, 627 exp.Time: no_time_sql, 628 exp.TimeDiff: _timediff_sql, 629 exp.Timestamp: no_timestamp_sql, 630 exp.TimestampDiff: lambda self, e: self.func( 631 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 632 ), 633 exp.TimestampTrunc: timestamptrunc_sql(), 634 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 635 exp.TimeStrToTime: timestrtotime_sql, 636 exp.TimeStrToUnix: lambda self, e: self.func( 637 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 638 ), 639 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 640 exp.TimeToUnix: rename_func("EPOCH"), 641 exp.TsOrDiToDi: lambda self, 642 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 643 exp.TsOrDsAdd: _date_delta_sql, 644 exp.TsOrDsDiff: lambda self, e: self.func( 645 "DATE_DIFF", 646 f"'{e.args.get('unit') or 'DAY'}'", 647 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 648 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 649 ), 650 exp.UnixToStr: lambda self, e: self.func( 651 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 652 ), 653 exp.DatetimeTrunc: lambda self, e: self.func( 654 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 655 ), 656 exp.UnixToTime: _unix_to_time_sql, 657 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 658 exp.VariancePop: rename_func("VAR_POP"), 659 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 660 exp.Xor: bool_xor_sql, 661 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 662 rename_func("LEVENSHTEIN") 663 ), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 exp.JSONPathWildcard, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BINARY: "BLOB", 676 exp.DataType.Type.BPCHAR: "TEXT", 677 exp.DataType.Type.CHAR: "TEXT", 678 exp.DataType.Type.DATETIME: "TIMESTAMP", 679 exp.DataType.Type.FLOAT: "REAL", 680 exp.DataType.Type.NCHAR: "TEXT", 681 exp.DataType.Type.NVARCHAR: "TEXT", 682 exp.DataType.Type.UINT: "UINTEGER", 683 exp.DataType.Type.VARBINARY: "BLOB", 684 exp.DataType.Type.ROWVERSION: "BLOB", 685 exp.DataType.Type.VARCHAR: "TEXT", 686 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 687 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 688 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 689 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 690 } 691 692 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 693 RESERVED_KEYWORDS = { 694 "array", 695 "analyse", 696 "union", 697 "all", 698 "when", 699 "in_p", 700 "default", 701 "create_p", 702 "window", 703 "asymmetric", 704 "to", 705 "else", 706 "localtime", 707 "from", 708 "end_p", 709 "select", 710 "current_date", 711 "foreign", 712 "with", 713 "grant", 714 "session_user", 715 "or", 716 "except", 717 "references", 718 "fetch", 719 "limit", 720 "group_p", 721 "leading", 722 "into", 723 "collate", 724 "offset", 725 "do", 726 "then", 727 "localtimestamp", 728 "check_p", 729 "lateral_p", 730 "current_role", 731 "where", 732 "asc_p", 733 "placing", 734 "desc_p", 735 "user", 736 "unique", 737 "initially", 738 "column", 739 "both", 740 "some", 741 "as", 742 "any", 743 "only", 744 "deferrable", 745 "null_p", 746 "current_time", 747 "true_p", 748 "table", 749 "case", 750 "trailing", 751 "variadic", 752 "for", 753 "on", 754 "distinct", 755 "false_p", 756 "not", 757 "constraint", 758 "current_timestamp", 759 "returning", 760 "primary", 761 "intersect", 762 "having", 763 "analyze", 764 "current_user", 765 "and", 766 "cast", 767 "symmetric", 768 "using", 769 "order", 770 "current_catalog", 771 } 772 773 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 774 775 # DuckDB doesn't generally support CREATE TABLE .. properties 776 # https://duckdb.org/docs/sql/statements/create_table.html 777 PROPERTIES_LOCATION = { 778 prop: exp.Properties.Location.UNSUPPORTED 779 for prop in generator.Generator.PROPERTIES_LOCATION 780 } 781 782 # There are a few exceptions (e.g. temporary tables) which are supported or 783 # can be transpiled to DuckDB, so we explicitly override them accordingly 784 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 785 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 786 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 787 788 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 789 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 790 791 def strtotime_sql(self, expression: exp.StrToTime) -> str: 792 if expression.args.get("safe"): 793 formatted_time = self.format_time(expression) 794 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 795 return str_to_time_sql(self, expression) 796 797 def strtodate_sql(self, expression: exp.StrToDate) -> str: 798 if expression.args.get("safe"): 799 formatted_time = self.format_time(expression) 800 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 801 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 802 803 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 804 arg = expression.this 805 if expression.args.get("safe"): 806 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 807 return self.func("JSON", arg) 808 809 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 810 nano = expression.args.get("nano") 811 if nano is not None: 812 expression.set( 813 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 814 ) 815 816 return rename_func("MAKE_TIME")(self, expression) 817 818 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 819 sec = expression.args["sec"] 820 821 milli = expression.args.get("milli") 822 if milli is not None: 823 sec += milli.pop() / exp.Literal.number(1000.0) 824 825 nano = expression.args.get("nano") 826 if nano is not None: 827 sec += nano.pop() / exp.Literal.number(1000000000.0) 828 829 if milli or nano: 830 expression.set("sec", sec) 831 832 return rename_func("MAKE_TIMESTAMP")(self, expression) 833 834 def tablesample_sql( 835 self, 836 expression: exp.TableSample, 837 tablesample_keyword: t.Optional[str] = None, 838 ) -> str: 839 if not isinstance(expression.parent, exp.Select): 840 # This sample clause only applies to a single source, not the entire resulting relation 841 tablesample_keyword = "TABLESAMPLE" 842 843 if expression.args.get("size"): 844 method = expression.args.get("method") 845 if method and method.name.upper() != "RESERVOIR": 846 self.unsupported( 847 f"Sampling method {method} is not supported with a discrete sample count, " 848 "defaulting to reservoir sampling" 849 ) 850 expression.set("method", exp.var("RESERVOIR")) 851 852 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 853 854 def interval_sql(self, expression: exp.Interval) -> str: 855 multiplier: t.Optional[int] = None 856 unit = expression.text("unit").lower() 857 858 if unit.startswith("week"): 859 multiplier = 7 860 if unit.startswith("quarter"): 861 multiplier = 90 862 863 if multiplier: 864 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 865 866 return super().interval_sql(expression) 867 868 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 869 if isinstance(expression.parent, exp.UserDefinedFunction): 870 return self.sql(expression, "this") 871 return super().columndef_sql(expression, sep) 872 873 def join_sql(self, expression: exp.Join) -> str: 874 if ( 875 expression.side == "LEFT" 876 and not expression.args.get("on") 877 and isinstance(expression.this, exp.Unnest) 878 ): 879 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 880 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 881 return super().join_sql(expression.on(exp.true())) 882 883 return super().join_sql(expression) 884 885 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 886 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 887 if expression.args.get("is_end_exclusive"): 888 return rename_func("RANGE")(self, expression) 889 890 return self.function_fallback_sql(expression) 891 892 def bracket_sql(self, expression: exp.Bracket) -> str: 893 this = expression.this 894 if isinstance(this, exp.Array): 895 this.replace(exp.paren(this)) 896 897 bracket = super().bracket_sql(expression) 898 899 if not expression.args.get("returns_list_for_maps"): 900 if not this.type: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 this = annotate_types(this) 904 905 if this.is_type(exp.DataType.Type.MAP): 906 bracket = f"({bracket})[1]" 907 908 return bracket 909 910 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 911 expression_sql = self.sql(expression, "expression") 912 913 func = expression.this 914 if isinstance(func, exp.PERCENTILES): 915 # Make the order key the first arg and slide the fraction to the right 916 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 917 order_col = expression.find(exp.Ordered) 918 if order_col: 919 func.set("expression", func.this) 920 func.set("this", order_col.this) 921 922 this = self.sql(expression, "this").rstrip(")") 923 924 return f"{this}{expression_sql})" 925 926 def length_sql(self, expression: exp.Length) -> str: 927 arg = expression.this 928 929 # Dialects like BQ and Snowflake also accept binary values as args, so 930 # DDB will attempt to infer the type or resort to case/when resolution 931 if not expression.args.get("binary") or arg.is_string: 932 return self.func("LENGTH", arg) 933 934 if not arg.type: 935 from sqlglot.optimizer.annotate_types import annotate_types 936 937 arg = annotate_types(arg) 938 939 if arg.is_type(*exp.DataType.TEXT_TYPES): 940 return self.func("LENGTH", arg) 941 942 # We need these casts to make duckdb's static type checker happy 943 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 944 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 945 946 case = ( 947 exp.case(self.func("TYPEOF", arg)) 948 .when( 949 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 950 ) # anonymous to break length_sql recursion 951 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 952 ) 953 954 return self.sql(case) 955 956 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 957 this = expression.this 958 key = expression.args.get("key") 959 key_sql = key.name if isinstance(key, exp.Expression) else "" 960 value_sql = self.sql(expression, "value") 961 962 kv_sql = f"{key_sql} := {value_sql}" 963 964 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 965 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 966 if isinstance(this, exp.Struct) and not this.expressions: 967 return self.func("STRUCT_PACK", kv_sql) 968 969 return self.func("STRUCT_INSERT", this, kv_sql) 970 971 def unnest_sql(self, expression: exp.Unnest) -> str: 972 explode_array = expression.args.get("explode_array") 973 if explode_array: 974 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 975 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 976 expression.expressions.append( 977 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 978 ) 979 980 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 981 alias = expression.args.get("alias") 982 if alias: 983 expression.set("alias", None) 984 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 985 986 unnest_sql = super().unnest_sql(expression) 987 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 988 return self.sql(select) 989 990 return super().unnest_sql(expression) 991 992 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 993 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 994 # DuckDB should render IGNORE NULLS only for the general-purpose 995 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 996 return super().ignorenulls_sql(expression) 997 998 return self.sql(expression, "this") 999 1000 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1001 this = self.sql(expression, "this") 1002 null_text = self.sql(expression, "null") 1003 1004 if null_text: 1005 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1006 1007 return self.func("ARRAY_TO_STRING", this, expression.expression) 1008 1009 @unsupported_args("position", "occurrence") 1010 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1011 group = expression.args.get("group") 1012 params = expression.args.get("parameters") 1013 1014 # Do not render group if there is no following argument, 1015 # and it's the default value for this dialect 1016 if ( 1017 not params 1018 and group 1019 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1020 ): 1021 group = None 1022 return self.func( 1023 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1024 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
300 class Tokenizer(tokens.Tokenizer): 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.ATTACH, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "DETACH": TokenType.DETACH, 320 "EXCLUDE": TokenType.EXCEPT, 321 "LOGICAL": TokenType.BOOLEAN, 322 "ONLY": TokenType.ONLY, 323 "PIVOT_WIDER": TokenType.PIVOT, 324 "POSITIONAL": TokenType.POSITIONAL, 325 "SIGNED": TokenType.INT, 326 "STRING": TokenType.TEXT, 327 "SUMMARIZE": TokenType.SUMMARIZE, 328 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 329 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 330 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 331 "TIMESTAMP_US": TokenType.TIMESTAMP, 332 "UBIGINT": TokenType.UBIGINT, 333 "UINTEGER": TokenType.UINT, 334 "USMALLINT": TokenType.USMALLINT, 335 "UTINYINT": TokenType.UTINYINT, 336 "VARCHAR": TokenType.TEXT, 337 } 338 KEYWORDS.pop("/*+") 339 340 SINGLE_TOKENS = { 341 **tokens.Tokenizer.SINGLE_TOKENS, 342 "$": TokenType.PARAMETER, 343 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
345 class Parser(parser.Parser): 346 BITWISE = { 347 **parser.Parser.BITWISE, 348 TokenType.TILDA: exp.RegexpLike, 349 } 350 BITWISE.pop(TokenType.CARET) 351 352 RANGE_PARSERS = { 353 **parser.Parser.RANGE_PARSERS, 354 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 355 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 356 } 357 358 EXPONENT = { 359 **parser.Parser.EXPONENT, 360 TokenType.CARET: exp.Pow, 361 TokenType.DSTAR: exp.Pow, 362 } 363 364 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_HAS": exp.ArrayContains.from_arg_list, 388 "LIST_REVERSE_SORT": _build_sort_array_desc, 389 "LIST_SORT": exp.SortArray.from_arg_list, 390 "LIST_VALUE": lambda args: exp.Array(expressions=args), 391 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 392 "MAKE_TIMESTAMP": _build_make_timestamp, 393 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 394 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 395 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 396 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 397 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 398 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 399 this=seq_get(args, 0), 400 expression=seq_get(args, 1), 401 replacement=seq_get(args, 2), 402 modifiers=seq_get(args, 3), 403 ), 404 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 405 "STRING_SPLIT": exp.Split.from_arg_list, 406 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 407 "STRING_TO_ARRAY": exp.Split.from_arg_list, 408 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 409 "STRUCT_PACK": exp.Struct.from_arg_list, 410 "STR_SPLIT": exp.Split.from_arg_list, 411 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 413 "UNNEST": exp.Explode.from_arg_list, 414 "XOR": binary_from_function(exp.BitwiseXor), 415 "GENERATE_SERIES": _build_generate_series(), 416 "RANGE": _build_generate_series(end_exclusive=True), 417 "EDITDIST3": exp.Levenshtein.from_arg_list, 418 } 419 420 FUNCTIONS.pop("DATE_SUB") 421 FUNCTIONS.pop("GLOB") 422 423 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 424 FUNCTION_PARSERS.pop("DECODE") 425 426 NO_PAREN_FUNCTION_PARSERS = { 427 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 428 "MAP": lambda self: self._parse_map(), 429 } 430 431 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 432 TokenType.SEMI, 433 TokenType.ANTI, 434 } 435 436 PLACEHOLDER_PARSERS = { 437 **parser.Parser.PLACEHOLDER_PARSERS, 438 TokenType.PARAMETER: lambda self: ( 439 self.expression(exp.Placeholder, this=self._prev.text) 440 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 441 else None 442 ), 443 } 444 445 TYPE_CONVERTERS = { 446 # https://duckdb.org/docs/sql/data_types/numeric 447 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 448 # https://duckdb.org/docs/sql/data_types/text 449 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 450 } 451 452 STATEMENT_PARSERS = { 453 **parser.Parser.STATEMENT_PARSERS, 454 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 455 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 456 } 457 458 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 459 # https://duckdb.org/docs/sql/samples.html 460 sample = super()._parse_table_sample(as_modifier=as_modifier) 461 if sample and not sample.args.get("method"): 462 if sample.args.get("size"): 463 sample.set("method", exp.var("RESERVOIR")) 464 else: 465 sample.set("method", exp.var("SYSTEM")) 466 467 return sample 468 469 def _parse_bracket( 470 self, this: t.Optional[exp.Expression] = None 471 ) -> t.Optional[exp.Expression]: 472 bracket = super()._parse_bracket(this) 473 if isinstance(bracket, exp.Bracket): 474 bracket.set("returns_list_for_maps", True) 475 476 return bracket 477 478 def _parse_map(self) -> exp.ToMap | exp.Map: 479 if self._match(TokenType.L_BRACE, advance=False): 480 return self.expression(exp.ToMap, this=self._parse_bracket()) 481 482 args = self._parse_wrapped_csv(self._parse_assignment) 483 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 484 485 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 486 return self._parse_field_def() 487 488 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 489 if len(aggregations) == 1: 490 return super()._pivot_column_names(aggregations) 491 return pivot_column_names(aggregations, dialect="duckdb") 492 493 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 494 def _parse_attach_option() -> exp.AttachOption: 495 return self.expression( 496 exp.AttachOption, 497 this=self._parse_var(any_token=True), 498 expression=self._parse_field(any_token=True), 499 ) 500 501 self._match(TokenType.DATABASE) 502 exists = self._parse_exists(not_=is_attach) 503 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 504 505 if self._match(TokenType.L_PAREN, advance=False): 506 expressions = self._parse_wrapped_csv(_parse_attach_option) 507 else: 508 expressions = None 509 510 return ( 511 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 512 if is_attach 513 else self.expression(exp.Detach, this=this, exists=exists) 514 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
516 class Generator(generator.Generator): 517 PARAMETER_TOKEN = "$" 518 NAMED_PLACEHOLDER_TOKEN = "$" 519 JOIN_HINTS = False 520 TABLE_HINTS = False 521 QUERY_HINTS = False 522 LIMIT_FETCH = "LIMIT" 523 STRUCT_DELIMITER = ("(", ")") 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 SEMI_ANTI_JOIN_WITH_SIDE = False 527 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 528 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 529 LAST_DAY_SUPPORTS_DATE_PART = False 530 JSON_KEY_VALUE_PAIR_SEP = "," 531 IGNORE_NULLS_IN_FUNC = True 532 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 533 SUPPORTS_CREATE_TABLE_LIKE = False 534 MULTI_ARG_DISTINCT = False 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 SUPPORTS_TO_NUMBER = False 537 COPY_HAS_INTO_KEYWORD = False 538 STAR_EXCEPT = "EXCLUDE" 539 PAD_FILL_PATTERN_IS_REQUIRED = True 540 ARRAY_CONCAT_IS_VAR_LEN = False 541 ARRAY_SIZE_DIM_REQUIRED = False 542 543 TRANSFORMS = { 544 **generator.Generator.TRANSFORMS, 545 exp.ApproxDistinct: approx_count_distinct_sql, 546 exp.Array: inline_array_unless_query, 547 exp.ArrayFilter: rename_func("LIST_FILTER"), 548 exp.ArraySort: _array_sort_sql, 549 exp.ArraySum: rename_func("LIST_SUM"), 550 exp.BitwiseXor: rename_func("XOR"), 551 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 552 exp.CurrentDate: lambda *_: "CURRENT_DATE", 553 exp.CurrentTime: lambda *_: "CURRENT_TIME", 554 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 555 exp.DayOfMonth: rename_func("DAYOFMONTH"), 556 exp.DayOfWeek: rename_func("DAYOFWEEK"), 557 exp.DayOfWeekIso: rename_func("ISODOW"), 558 exp.DayOfYear: rename_func("DAYOFYEAR"), 559 exp.DataType: _datatype_sql, 560 exp.Date: _date_sql, 561 exp.DateAdd: _date_delta_sql, 562 exp.DateFromParts: rename_func("MAKE_DATE"), 563 exp.DateSub: _date_delta_sql, 564 exp.DateDiff: _date_diff_sql, 565 exp.DateStrToDate: datestrtodate_sql, 566 exp.Datetime: no_datetime_sql, 567 exp.DatetimeSub: _date_delta_sql, 568 exp.DatetimeAdd: _date_delta_sql, 569 exp.DateToDi: lambda self, 570 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 571 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 572 exp.DiToDate: lambda self, 573 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 574 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 575 exp.GenerateDateArray: _generate_datetime_array_sql, 576 exp.GenerateTimestampArray: _generate_datetime_array_sql, 577 exp.Explode: rename_func("UNNEST"), 578 exp.IntDiv: lambda self, e: self.binary(e, "//"), 579 exp.IsInf: rename_func("ISINF"), 580 exp.IsNan: rename_func("ISNAN"), 581 exp.JSONBExists: rename_func("JSON_EXISTS"), 582 exp.JSONExtract: _arrow_json_extract_sql, 583 exp.JSONExtractArray: _json_extract_value_array_sql, 584 exp.JSONExtractScalar: _arrow_json_extract_sql, 585 exp.JSONFormat: _json_format_sql, 586 exp.JSONValueArray: _json_extract_value_array_sql, 587 exp.Lateral: explode_to_unnest_sql, 588 exp.LogicalOr: rename_func("BOOL_OR"), 589 exp.LogicalAnd: rename_func("BOOL_AND"), 590 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 591 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 592 exp.MonthsBetween: lambda self, e: self.func( 593 "DATEDIFF", 594 "'month'", 595 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 596 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 597 ), 598 exp.PercentileCont: rename_func("QUANTILE_CONT"), 599 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 600 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 601 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 602 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 603 exp.RegexpReplace: lambda self, e: self.func( 604 "REGEXP_REPLACE", 605 e.this, 606 e.expression, 607 e.args.get("replacement"), 608 e.args.get("modifiers"), 609 ), 610 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 611 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 612 exp.Return: lambda self, e: self.sql(e, "this"), 613 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 614 exp.Rand: rename_func("RANDOM"), 615 exp.SafeDivide: no_safe_divide_sql, 616 exp.SHA: rename_func("SHA1"), 617 exp.SHA2: sha256_sql, 618 exp.Split: rename_func("STR_SPLIT"), 619 exp.SortArray: _sort_array_sql, 620 exp.StrPosition: str_position_sql, 621 exp.StrToUnix: lambda self, e: self.func( 622 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 623 ), 624 exp.Struct: _struct_sql, 625 exp.Transform: rename_func("LIST_TRANSFORM"), 626 exp.TimeAdd: _date_delta_sql, 627 exp.Time: no_time_sql, 628 exp.TimeDiff: _timediff_sql, 629 exp.Timestamp: no_timestamp_sql, 630 exp.TimestampDiff: lambda self, e: self.func( 631 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 632 ), 633 exp.TimestampTrunc: timestamptrunc_sql(), 634 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 635 exp.TimeStrToTime: timestrtotime_sql, 636 exp.TimeStrToUnix: lambda self, e: self.func( 637 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 638 ), 639 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 640 exp.TimeToUnix: rename_func("EPOCH"), 641 exp.TsOrDiToDi: lambda self, 642 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 643 exp.TsOrDsAdd: _date_delta_sql, 644 exp.TsOrDsDiff: lambda self, e: self.func( 645 "DATE_DIFF", 646 f"'{e.args.get('unit') or 'DAY'}'", 647 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 648 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 649 ), 650 exp.UnixToStr: lambda self, e: self.func( 651 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 652 ), 653 exp.DatetimeTrunc: lambda self, e: self.func( 654 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 655 ), 656 exp.UnixToTime: _unix_to_time_sql, 657 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 658 exp.VariancePop: rename_func("VAR_POP"), 659 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 660 exp.Xor: bool_xor_sql, 661 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 662 rename_func("LEVENSHTEIN") 663 ), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 exp.JSONPathWildcard, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BINARY: "BLOB", 676 exp.DataType.Type.BPCHAR: "TEXT", 677 exp.DataType.Type.CHAR: "TEXT", 678 exp.DataType.Type.DATETIME: "TIMESTAMP", 679 exp.DataType.Type.FLOAT: "REAL", 680 exp.DataType.Type.NCHAR: "TEXT", 681 exp.DataType.Type.NVARCHAR: "TEXT", 682 exp.DataType.Type.UINT: "UINTEGER", 683 exp.DataType.Type.VARBINARY: "BLOB", 684 exp.DataType.Type.ROWVERSION: "BLOB", 685 exp.DataType.Type.VARCHAR: "TEXT", 686 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 687 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 688 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 689 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 690 } 691 692 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 693 RESERVED_KEYWORDS = { 694 "array", 695 "analyse", 696 "union", 697 "all", 698 "when", 699 "in_p", 700 "default", 701 "create_p", 702 "window", 703 "asymmetric", 704 "to", 705 "else", 706 "localtime", 707 "from", 708 "end_p", 709 "select", 710 "current_date", 711 "foreign", 712 "with", 713 "grant", 714 "session_user", 715 "or", 716 "except", 717 "references", 718 "fetch", 719 "limit", 720 "group_p", 721 "leading", 722 "into", 723 "collate", 724 "offset", 725 "do", 726 "then", 727 "localtimestamp", 728 "check_p", 729 "lateral_p", 730 "current_role", 731 "where", 732 "asc_p", 733 "placing", 734 "desc_p", 735 "user", 736 "unique", 737 "initially", 738 "column", 739 "both", 740 "some", 741 "as", 742 "any", 743 "only", 744 "deferrable", 745 "null_p", 746 "current_time", 747 "true_p", 748 "table", 749 "case", 750 "trailing", 751 "variadic", 752 "for", 753 "on", 754 "distinct", 755 "false_p", 756 "not", 757 "constraint", 758 "current_timestamp", 759 "returning", 760 "primary", 761 "intersect", 762 "having", 763 "analyze", 764 "current_user", 765 "and", 766 "cast", 767 "symmetric", 768 "using", 769 "order", 770 "current_catalog", 771 } 772 773 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 774 775 # DuckDB doesn't generally support CREATE TABLE .. properties 776 # https://duckdb.org/docs/sql/statements/create_table.html 777 PROPERTIES_LOCATION = { 778 prop: exp.Properties.Location.UNSUPPORTED 779 for prop in generator.Generator.PROPERTIES_LOCATION 780 } 781 782 # There are a few exceptions (e.g. temporary tables) which are supported or 783 # can be transpiled to DuckDB, so we explicitly override them accordingly 784 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 785 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 786 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 787 788 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 789 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 790 791 def strtotime_sql(self, expression: exp.StrToTime) -> str: 792 if expression.args.get("safe"): 793 formatted_time = self.format_time(expression) 794 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 795 return str_to_time_sql(self, expression) 796 797 def strtodate_sql(self, expression: exp.StrToDate) -> str: 798 if expression.args.get("safe"): 799 formatted_time = self.format_time(expression) 800 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 801 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 802 803 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 804 arg = expression.this 805 if expression.args.get("safe"): 806 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 807 return self.func("JSON", arg) 808 809 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 810 nano = expression.args.get("nano") 811 if nano is not None: 812 expression.set( 813 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 814 ) 815 816 return rename_func("MAKE_TIME")(self, expression) 817 818 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 819 sec = expression.args["sec"] 820 821 milli = expression.args.get("milli") 822 if milli is not None: 823 sec += milli.pop() / exp.Literal.number(1000.0) 824 825 nano = expression.args.get("nano") 826 if nano is not None: 827 sec += nano.pop() / exp.Literal.number(1000000000.0) 828 829 if milli or nano: 830 expression.set("sec", sec) 831 832 return rename_func("MAKE_TIMESTAMP")(self, expression) 833 834 def tablesample_sql( 835 self, 836 expression: exp.TableSample, 837 tablesample_keyword: t.Optional[str] = None, 838 ) -> str: 839 if not isinstance(expression.parent, exp.Select): 840 # This sample clause only applies to a single source, not the entire resulting relation 841 tablesample_keyword = "TABLESAMPLE" 842 843 if expression.args.get("size"): 844 method = expression.args.get("method") 845 if method and method.name.upper() != "RESERVOIR": 846 self.unsupported( 847 f"Sampling method {method} is not supported with a discrete sample count, " 848 "defaulting to reservoir sampling" 849 ) 850 expression.set("method", exp.var("RESERVOIR")) 851 852 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 853 854 def interval_sql(self, expression: exp.Interval) -> str: 855 multiplier: t.Optional[int] = None 856 unit = expression.text("unit").lower() 857 858 if unit.startswith("week"): 859 multiplier = 7 860 if unit.startswith("quarter"): 861 multiplier = 90 862 863 if multiplier: 864 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 865 866 return super().interval_sql(expression) 867 868 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 869 if isinstance(expression.parent, exp.UserDefinedFunction): 870 return self.sql(expression, "this") 871 return super().columndef_sql(expression, sep) 872 873 def join_sql(self, expression: exp.Join) -> str: 874 if ( 875 expression.side == "LEFT" 876 and not expression.args.get("on") 877 and isinstance(expression.this, exp.Unnest) 878 ): 879 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 880 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 881 return super().join_sql(expression.on(exp.true())) 882 883 return super().join_sql(expression) 884 885 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 886 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 887 if expression.args.get("is_end_exclusive"): 888 return rename_func("RANGE")(self, expression) 889 890 return self.function_fallback_sql(expression) 891 892 def bracket_sql(self, expression: exp.Bracket) -> str: 893 this = expression.this 894 if isinstance(this, exp.Array): 895 this.replace(exp.paren(this)) 896 897 bracket = super().bracket_sql(expression) 898 899 if not expression.args.get("returns_list_for_maps"): 900 if not this.type: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 this = annotate_types(this) 904 905 if this.is_type(exp.DataType.Type.MAP): 906 bracket = f"({bracket})[1]" 907 908 return bracket 909 910 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 911 expression_sql = self.sql(expression, "expression") 912 913 func = expression.this 914 if isinstance(func, exp.PERCENTILES): 915 # Make the order key the first arg and slide the fraction to the right 916 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 917 order_col = expression.find(exp.Ordered) 918 if order_col: 919 func.set("expression", func.this) 920 func.set("this", order_col.this) 921 922 this = self.sql(expression, "this").rstrip(")") 923 924 return f"{this}{expression_sql})" 925 926 def length_sql(self, expression: exp.Length) -> str: 927 arg = expression.this 928 929 # Dialects like BQ and Snowflake also accept binary values as args, so 930 # DDB will attempt to infer the type or resort to case/when resolution 931 if not expression.args.get("binary") or arg.is_string: 932 return self.func("LENGTH", arg) 933 934 if not arg.type: 935 from sqlglot.optimizer.annotate_types import annotate_types 936 937 arg = annotate_types(arg) 938 939 if arg.is_type(*exp.DataType.TEXT_TYPES): 940 return self.func("LENGTH", arg) 941 942 # We need these casts to make duckdb's static type checker happy 943 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 944 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 945 946 case = ( 947 exp.case(self.func("TYPEOF", arg)) 948 .when( 949 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 950 ) # anonymous to break length_sql recursion 951 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 952 ) 953 954 return self.sql(case) 955 956 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 957 this = expression.this 958 key = expression.args.get("key") 959 key_sql = key.name if isinstance(key, exp.Expression) else "" 960 value_sql = self.sql(expression, "value") 961 962 kv_sql = f"{key_sql} := {value_sql}" 963 964 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 965 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 966 if isinstance(this, exp.Struct) and not this.expressions: 967 return self.func("STRUCT_PACK", kv_sql) 968 969 return self.func("STRUCT_INSERT", this, kv_sql) 970 971 def unnest_sql(self, expression: exp.Unnest) -> str: 972 explode_array = expression.args.get("explode_array") 973 if explode_array: 974 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 975 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 976 expression.expressions.append( 977 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 978 ) 979 980 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 981 alias = expression.args.get("alias") 982 if alias: 983 expression.set("alias", None) 984 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 985 986 unnest_sql = super().unnest_sql(expression) 987 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 988 return self.sql(select) 989 990 return super().unnest_sql(expression) 991 992 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 993 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 994 # DuckDB should render IGNORE NULLS only for the general-purpose 995 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 996 return super().ignorenulls_sql(expression) 997 998 return self.sql(expression, "this") 999 1000 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1001 this = self.sql(expression, "this") 1002 null_text = self.sql(expression, "null") 1003 1004 if null_text: 1005 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1006 1007 return self.func("ARRAY_TO_STRING", this, expression.expression) 1008 1009 @unsupported_args("position", "occurrence") 1010 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1011 group = expression.args.get("group") 1012 params = expression.args.get("parameters") 1013 1014 # Do not render group if there is no following argument, 1015 # and it's the default value for this dialect 1016 if ( 1017 not params 1018 and group 1019 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1020 ): 1021 group = None 1022 return self.func( 1023 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1024 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
797 def strtodate_sql(self, expression: exp.StrToDate) -> str: 798 if expression.args.get("safe"): 799 formatted_time = self.format_time(expression) 800 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 801 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
809 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 810 nano = expression.args.get("nano") 811 if nano is not None: 812 expression.set( 813 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 814 ) 815 816 return rename_func("MAKE_TIME")(self, expression)
818 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 819 sec = expression.args["sec"] 820 821 milli = expression.args.get("milli") 822 if milli is not None: 823 sec += milli.pop() / exp.Literal.number(1000.0) 824 825 nano = expression.args.get("nano") 826 if nano is not None: 827 sec += nano.pop() / exp.Literal.number(1000000000.0) 828 829 if milli or nano: 830 expression.set("sec", sec) 831 832 return rename_func("MAKE_TIMESTAMP")(self, expression)
834 def tablesample_sql( 835 self, 836 expression: exp.TableSample, 837 tablesample_keyword: t.Optional[str] = None, 838 ) -> str: 839 if not isinstance(expression.parent, exp.Select): 840 # This sample clause only applies to a single source, not the entire resulting relation 841 tablesample_keyword = "TABLESAMPLE" 842 843 if expression.args.get("size"): 844 method = expression.args.get("method") 845 if method and method.name.upper() != "RESERVOIR": 846 self.unsupported( 847 f"Sampling method {method} is not supported with a discrete sample count, " 848 "defaulting to reservoir sampling" 849 ) 850 expression.set("method", exp.var("RESERVOIR")) 851 852 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
854 def interval_sql(self, expression: exp.Interval) -> str: 855 multiplier: t.Optional[int] = None 856 unit = expression.text("unit").lower() 857 858 if unit.startswith("week"): 859 multiplier = 7 860 if unit.startswith("quarter"): 861 multiplier = 90 862 863 if multiplier: 864 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 865 866 return super().interval_sql(expression)
873 def join_sql(self, expression: exp.Join) -> str: 874 if ( 875 expression.side == "LEFT" 876 and not expression.args.get("on") 877 and isinstance(expression.this, exp.Unnest) 878 ): 879 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 880 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 881 return super().join_sql(expression.on(exp.true())) 882 883 return super().join_sql(expression)
892 def bracket_sql(self, expression: exp.Bracket) -> str: 893 this = expression.this 894 if isinstance(this, exp.Array): 895 this.replace(exp.paren(this)) 896 897 bracket = super().bracket_sql(expression) 898 899 if not expression.args.get("returns_list_for_maps"): 900 if not this.type: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 this = annotate_types(this) 904 905 if this.is_type(exp.DataType.Type.MAP): 906 bracket = f"({bracket})[1]" 907 908 return bracket
910 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 911 expression_sql = self.sql(expression, "expression") 912 913 func = expression.this 914 if isinstance(func, exp.PERCENTILES): 915 # Make the order key the first arg and slide the fraction to the right 916 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 917 order_col = expression.find(exp.Ordered) 918 if order_col: 919 func.set("expression", func.this) 920 func.set("this", order_col.this) 921 922 this = self.sql(expression, "this").rstrip(")") 923 924 return f"{this}{expression_sql})"
926 def length_sql(self, expression: exp.Length) -> str: 927 arg = expression.this 928 929 # Dialects like BQ and Snowflake also accept binary values as args, so 930 # DDB will attempt to infer the type or resort to case/when resolution 931 if not expression.args.get("binary") or arg.is_string: 932 return self.func("LENGTH", arg) 933 934 if not arg.type: 935 from sqlglot.optimizer.annotate_types import annotate_types 936 937 arg = annotate_types(arg) 938 939 if arg.is_type(*exp.DataType.TEXT_TYPES): 940 return self.func("LENGTH", arg) 941 942 # We need these casts to make duckdb's static type checker happy 943 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 944 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 945 946 case = ( 947 exp.case(self.func("TYPEOF", arg)) 948 .when( 949 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 950 ) # anonymous to break length_sql recursion 951 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 952 ) 953 954 return self.sql(case)
956 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 957 this = expression.this 958 key = expression.args.get("key") 959 key_sql = key.name if isinstance(key, exp.Expression) else "" 960 value_sql = self.sql(expression, "value") 961 962 kv_sql = f"{key_sql} := {value_sql}" 963 964 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 965 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 966 if isinstance(this, exp.Struct) and not this.expressions: 967 return self.func("STRUCT_PACK", kv_sql) 968 969 return self.func("STRUCT_INSERT", this, kv_sql)
971 def unnest_sql(self, expression: exp.Unnest) -> str: 972 explode_array = expression.args.get("explode_array") 973 if explode_array: 974 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 975 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 976 expression.expressions.append( 977 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 978 ) 979 980 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 981 alias = expression.args.get("alias") 982 if alias: 983 expression.set("alias", None) 984 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 985 986 unnest_sql = super().unnest_sql(expression) 987 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 988 return self.sql(select) 989 990 return super().unnest_sql(expression)
992 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 993 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 994 # DuckDB should render IGNORE NULLS only for the general-purpose 995 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 996 return super().ignorenulls_sql(expression) 997 998 return self.sql(expression, "this")
1000 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1001 this = self.sql(expression, "this") 1002 null_text = self.sql(expression, "null") 1003 1004 if null_text: 1005 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1006 1007 return self.func("ARRAY_TO_STRING", this, expression.expression)
1009 @unsupported_args("position", "occurrence") 1010 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1011 group = expression.args.get("group") 1012 params = expression.args.get("parameters") 1013 1014 # Do not render group if there is no following argument, 1015 # and it's the default value for this dialect 1016 if ( 1017 not params 1018 and group 1019 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1020 ): 1021 group = None 1022 return self.func( 1023 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1024 )
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql