sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arrow_json_extract_sql, 13 binary_from_function, 14 bool_xor_sql, 15 build_default_decimal_type, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 no_datetime_sql, 19 encode_decode_sql, 20 build_formatted_time, 21 inline_array_unless_query, 22 no_comment_column_constraint_sql, 23 no_safe_divide_sql, 24 no_time_sql, 25 no_timestamp_sql, 26 pivot_column_names, 27 rename_func, 28 str_position_sql, 29 str_to_time_sql, 30 timestamptrunc_sql, 31 timestrtotime_sql, 32 unit_to_var, 33 unit_to_str, 34 sha256_sql, 35 build_regexp_extract, 36 explode_to_unnest_sql, 37 no_make_interval_sql, 38) 39from sqlglot.generator import unsupported_args 40from sqlglot.helper import seq_get 41from sqlglot.tokens import TokenType 42from sqlglot.parser import binary_range_parser 43 44DATETIME_DELTA = t.Union[ 45 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 46] 47 48WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 49 exp.FirstValue, 50 exp.LastValue, 51 exp.Lag, 52 exp.Lead, 53 exp.NthValue, 54) 55 56 57def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 58 this = expression.this 59 unit = unit_to_var(expression) 60 op = ( 61 "+" 62 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 63 else "-" 64 ) 65 66 to_type: t.Optional[DATA_TYPE] = None 67 if isinstance(expression, exp.TsOrDsAdd): 68 to_type = expression.return_type 69 elif this.is_string: 70 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 71 to_type = ( 72 exp.DataType.Type.DATETIME 73 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 74 else exp.DataType.Type.DATE 75 ) 76 77 this = exp.cast(this, to_type) if to_type else this 78 79 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 80 81 82# BigQuery -> DuckDB conversion for the DATE function 83def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 84 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 85 zone = self.sql(expression, "zone") 86 87 if zone: 88 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 89 date_str = f"{date_str} || ' ' || {zone}" 90 91 # This will create a TIMESTAMP with time zone information 92 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 93 94 return result 95 96 97# BigQuery -> DuckDB conversion for the TIME_DIFF function 98def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIME) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 101 102 # Although the 2 dialects share similar signatures, BQ seems to inverse 103 # the sign of the result so the start/end time operands are flipped 104 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 105 106 107@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 108def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 109 return self.func("ARRAY_SORT", expression.this) 110 111 112def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 113 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 114 return self.func(name, expression.this) 115 116 117def _build_sort_array_desc(args: t.List) -> exp.Expression: 118 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 119 120 121def _build_date_diff(args: t.List) -> exp.Expression: 122 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 123 124 125def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 126 def _builder(args: t.List) -> exp.GenerateSeries: 127 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 128 if len(args) == 1: 129 # DuckDB uses 0 as a default for the series' start when it's omitted 130 args.insert(0, exp.Literal.number("0")) 131 132 gen_series = exp.GenerateSeries.from_arg_list(args) 133 gen_series.set("is_end_exclusive", end_exclusive) 134 135 return gen_series 136 137 return _builder 138 139 140def _build_make_timestamp(args: t.List) -> exp.Expression: 141 if len(args) == 1: 142 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 143 144 return exp.TimestampFromParts( 145 year=seq_get(args, 0), 146 month=seq_get(args, 1), 147 day=seq_get(args, 2), 148 hour=seq_get(args, 3), 149 min=seq_get(args, 4), 150 sec=seq_get(args, 5), 151 ) 152 153 154def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 155 args: t.List[str] = [] 156 157 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 158 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 159 # The transformation to ROW will take place if: 160 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 161 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 162 ancestor_cast = expression.find_ancestor(exp.Cast) 163 is_bq_inline_struct = ( 164 (expression.find(exp.PropertyEQ) is None) 165 and ancestor_cast 166 and any( 167 casted_type.is_type(exp.DataType.Type.STRUCT) 168 for casted_type in ancestor_cast.find_all(exp.DataType) 169 ) 170 ) 171 172 for i, expr in enumerate(expression.expressions): 173 is_property_eq = isinstance(expr, exp.PropertyEQ) 174 value = expr.expression if is_property_eq else expr 175 176 if is_bq_inline_struct: 177 args.append(self.sql(value)) 178 else: 179 key = expr.name if is_property_eq else f"_{i}" 180 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 181 182 csv_args = ", ".join(args) 183 184 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 185 186 187def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 188 if expression.is_type("array"): 189 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 190 191 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 192 if expression.is_type( 193 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 194 ): 195 return expression.this.value 196 197 return self.datatype_sql(expression) 198 199 200def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 201 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 202 return f"CAST({sql} AS TEXT)" 203 204 205def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TO_TIMESTAMP", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("EPOCH_MS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("MAKE_TIMESTAMP", timestamp) 215 216 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 217 218 219WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 220 221 222def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 223 arrow_sql = arrow_json_extract_sql(self, expression) 224 if not expression.same_parent and isinstance( 225 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 226 ): 227 arrow_sql = self.wrap(arrow_sql) 228 return arrow_sql 229 230 231def _implicit_datetime_cast( 232 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 233) -> t.Optional[exp.Expression]: 234 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 235 236 237def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 238 this = _implicit_datetime_cast(expression.this) 239 expr = _implicit_datetime_cast(expression.expression) 240 241 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 242 243 244def _generate_datetime_array_sql( 245 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 246) -> str: 247 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 248 249 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 250 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 251 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 252 253 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 254 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 255 start=start, end=end, step=expression.args.get("step") 256 ) 257 258 if is_generate_date_array: 259 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 260 # GENERATE_DATE_ARRAY we must cast it back to DATE array 261 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 262 263 return self.sql(gen_series) 264 265 266def _json_extract_value_array_sql( 267 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 268) -> str: 269 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 270 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 271 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 272 273 274class DuckDB(Dialect): 275 NULL_ORDERING = "nulls_are_last" 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SAFE_DIVISION = True 278 INDEX_OFFSET = 1 279 CONCAT_COALESCE = True 280 SUPPORTS_ORDER_BY_ALL = True 281 SUPPORTS_FIXED_SIZE_ARRAYS = True 282 STRICT_JSON_PATH_SYNTAX = False 283 284 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 285 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 286 287 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 288 if isinstance(path, exp.Literal): 289 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 290 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 291 # This check ensures we'll avoid trying to parse these as JSON paths, which can 292 # either result in a noisy warning or in an invalid representation of the path. 293 path_text = path.name 294 if path_text.startswith("/") or "[#" in path_text: 295 return path 296 297 return super().to_json_path(path) 298 299 class Tokenizer(tokens.Tokenizer): 300 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.ATTACH, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "DETACH": TokenType.DETACH, 320 "EXCLUDE": TokenType.EXCEPT, 321 "LOGICAL": TokenType.BOOLEAN, 322 "ONLY": TokenType.ONLY, 323 "PIVOT_WIDER": TokenType.PIVOT, 324 "POSITIONAL": TokenType.POSITIONAL, 325 "SIGNED": TokenType.INT, 326 "STRING": TokenType.TEXT, 327 "SUMMARIZE": TokenType.SUMMARIZE, 328 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 329 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 330 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 331 "TIMESTAMP_US": TokenType.TIMESTAMP, 332 "UBIGINT": TokenType.UBIGINT, 333 "UINTEGER": TokenType.UINT, 334 "USMALLINT": TokenType.USMALLINT, 335 "UTINYINT": TokenType.UTINYINT, 336 "VARCHAR": TokenType.TEXT, 337 } 338 KEYWORDS.pop("/*+") 339 340 SINGLE_TOKENS = { 341 **tokens.Tokenizer.SINGLE_TOKENS, 342 "$": TokenType.PARAMETER, 343 } 344 345 class Parser(parser.Parser): 346 BITWISE = { 347 **parser.Parser.BITWISE, 348 TokenType.TILDA: exp.RegexpLike, 349 } 350 BITWISE.pop(TokenType.CARET) 351 352 RANGE_PARSERS = { 353 **parser.Parser.RANGE_PARSERS, 354 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 355 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 356 } 357 358 EXPONENT = { 359 **parser.Parser.EXPONENT, 360 TokenType.CARET: exp.Pow, 361 TokenType.DSTAR: exp.Pow, 362 } 363 364 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_HAS": exp.ArrayContains.from_arg_list, 388 "LIST_REVERSE_SORT": _build_sort_array_desc, 389 "LIST_SORT": exp.SortArray.from_arg_list, 390 "LIST_VALUE": lambda args: exp.Array(expressions=args), 391 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 392 "MAKE_TIMESTAMP": _build_make_timestamp, 393 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 394 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 395 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 396 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 397 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 398 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 399 this=seq_get(args, 0), 400 expression=seq_get(args, 1), 401 replacement=seq_get(args, 2), 402 modifiers=seq_get(args, 3), 403 ), 404 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 405 "STRING_SPLIT": exp.Split.from_arg_list, 406 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 407 "STRING_TO_ARRAY": exp.Split.from_arg_list, 408 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 409 "STRUCT_PACK": exp.Struct.from_arg_list, 410 "STR_SPLIT": exp.Split.from_arg_list, 411 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 413 "UNNEST": exp.Explode.from_arg_list, 414 "XOR": binary_from_function(exp.BitwiseXor), 415 "GENERATE_SERIES": _build_generate_series(), 416 "RANGE": _build_generate_series(end_exclusive=True), 417 "EDITDIST3": exp.Levenshtein.from_arg_list, 418 } 419 420 FUNCTIONS.pop("DATE_SUB") 421 FUNCTIONS.pop("GLOB") 422 423 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 424 FUNCTION_PARSERS.pop("DECODE") 425 426 NO_PAREN_FUNCTION_PARSERS = { 427 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 428 "MAP": lambda self: self._parse_map(), 429 } 430 431 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 432 TokenType.SEMI, 433 TokenType.ANTI, 434 } 435 436 PLACEHOLDER_PARSERS = { 437 **parser.Parser.PLACEHOLDER_PARSERS, 438 TokenType.PARAMETER: lambda self: ( 439 self.expression(exp.Placeholder, this=self._prev.text) 440 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 441 else None 442 ), 443 } 444 445 TYPE_CONVERTERS = { 446 # https://duckdb.org/docs/sql/data_types/numeric 447 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 448 # https://duckdb.org/docs/sql/data_types/text 449 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 450 } 451 452 STATEMENT_PARSERS = { 453 **parser.Parser.STATEMENT_PARSERS, 454 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 455 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 456 } 457 458 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 459 # https://duckdb.org/docs/sql/samples.html 460 sample = super()._parse_table_sample(as_modifier=as_modifier) 461 if sample and not sample.args.get("method"): 462 if sample.args.get("size"): 463 sample.set("method", exp.var("RESERVOIR")) 464 else: 465 sample.set("method", exp.var("SYSTEM")) 466 467 return sample 468 469 def _parse_bracket( 470 self, this: t.Optional[exp.Expression] = None 471 ) -> t.Optional[exp.Expression]: 472 bracket = super()._parse_bracket(this) 473 if isinstance(bracket, exp.Bracket): 474 bracket.set("returns_list_for_maps", True) 475 476 return bracket 477 478 def _parse_map(self) -> exp.ToMap | exp.Map: 479 if self._match(TokenType.L_BRACE, advance=False): 480 return self.expression(exp.ToMap, this=self._parse_bracket()) 481 482 args = self._parse_wrapped_csv(self._parse_assignment) 483 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 484 485 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 486 return self._parse_field_def() 487 488 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 489 if len(aggregations) == 1: 490 return super()._pivot_column_names(aggregations) 491 return pivot_column_names(aggregations, dialect="duckdb") 492 493 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 494 def _parse_attach_option() -> exp.AttachOption: 495 return self.expression( 496 exp.AttachOption, 497 this=self._parse_var(any_token=True), 498 expression=self._parse_field(any_token=True), 499 ) 500 501 self._match(TokenType.DATABASE) 502 exists = self._parse_exists(not_=is_attach) 503 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 504 505 if self._match(TokenType.L_PAREN, advance=False): 506 expressions = self._parse_wrapped_csv(_parse_attach_option) 507 else: 508 expressions = None 509 510 return ( 511 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 512 if is_attach 513 else self.expression(exp.Detach, this=this, exists=exists) 514 ) 515 516 class Generator(generator.Generator): 517 PARAMETER_TOKEN = "$" 518 NAMED_PLACEHOLDER_TOKEN = "$" 519 JOIN_HINTS = False 520 TABLE_HINTS = False 521 QUERY_HINTS = False 522 LIMIT_FETCH = "LIMIT" 523 STRUCT_DELIMITER = ("(", ")") 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 SEMI_ANTI_JOIN_WITH_SIDE = False 527 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 528 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 529 LAST_DAY_SUPPORTS_DATE_PART = False 530 JSON_KEY_VALUE_PAIR_SEP = "," 531 IGNORE_NULLS_IN_FUNC = True 532 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 533 SUPPORTS_CREATE_TABLE_LIKE = False 534 MULTI_ARG_DISTINCT = False 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 SUPPORTS_TO_NUMBER = False 537 COPY_HAS_INTO_KEYWORD = False 538 STAR_EXCEPT = "EXCLUDE" 539 PAD_FILL_PATTERN_IS_REQUIRED = True 540 ARRAY_CONCAT_IS_VAR_LEN = False 541 ARRAY_SIZE_DIM_REQUIRED = False 542 543 TRANSFORMS = { 544 **generator.Generator.TRANSFORMS, 545 exp.ApproxDistinct: approx_count_distinct_sql, 546 exp.Array: inline_array_unless_query, 547 exp.ArrayFilter: rename_func("LIST_FILTER"), 548 exp.ArraySort: _array_sort_sql, 549 exp.ArraySum: rename_func("LIST_SUM"), 550 exp.BitwiseXor: rename_func("XOR"), 551 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 552 exp.CurrentDate: lambda *_: "CURRENT_DATE", 553 exp.CurrentTime: lambda *_: "CURRENT_TIME", 554 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 555 exp.DayOfMonth: rename_func("DAYOFMONTH"), 556 exp.DayOfWeek: rename_func("DAYOFWEEK"), 557 exp.DayOfWeekIso: rename_func("ISODOW"), 558 exp.DayOfYear: rename_func("DAYOFYEAR"), 559 exp.DataType: _datatype_sql, 560 exp.Date: _date_sql, 561 exp.DateAdd: _date_delta_sql, 562 exp.DateFromParts: rename_func("MAKE_DATE"), 563 exp.DateSub: _date_delta_sql, 564 exp.DateDiff: _date_diff_sql, 565 exp.DateStrToDate: datestrtodate_sql, 566 exp.Datetime: no_datetime_sql, 567 exp.DatetimeSub: _date_delta_sql, 568 exp.DatetimeAdd: _date_delta_sql, 569 exp.DateToDi: lambda self, 570 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 571 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 572 exp.DiToDate: lambda self, 573 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 574 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 575 exp.GenerateDateArray: _generate_datetime_array_sql, 576 exp.GenerateTimestampArray: _generate_datetime_array_sql, 577 exp.Explode: rename_func("UNNEST"), 578 exp.IntDiv: lambda self, e: self.binary(e, "//"), 579 exp.IsInf: rename_func("ISINF"), 580 exp.IsNan: rename_func("ISNAN"), 581 exp.JSONBExists: rename_func("JSON_EXISTS"), 582 exp.JSONExtract: _arrow_json_extract_sql, 583 exp.JSONExtractArray: _json_extract_value_array_sql, 584 exp.JSONExtractScalar: _arrow_json_extract_sql, 585 exp.JSONFormat: _json_format_sql, 586 exp.JSONValueArray: _json_extract_value_array_sql, 587 exp.Lateral: explode_to_unnest_sql, 588 exp.LogicalOr: rename_func("BOOL_OR"), 589 exp.LogicalAnd: rename_func("BOOL_AND"), 590 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 591 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 592 exp.MonthsBetween: lambda self, e: self.func( 593 "DATEDIFF", 594 "'month'", 595 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 596 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 597 ), 598 exp.PercentileCont: rename_func("QUANTILE_CONT"), 599 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 600 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 601 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 602 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 603 exp.RegexpReplace: lambda self, e: self.func( 604 "REGEXP_REPLACE", 605 e.this, 606 e.expression, 607 e.args.get("replacement"), 608 e.args.get("modifiers"), 609 ), 610 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 611 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 612 exp.Return: lambda self, e: self.sql(e, "this"), 613 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 614 exp.Rand: rename_func("RANDOM"), 615 exp.SafeDivide: no_safe_divide_sql, 616 exp.SHA: rename_func("SHA1"), 617 exp.SHA2: sha256_sql, 618 exp.Split: rename_func("STR_SPLIT"), 619 exp.SortArray: _sort_array_sql, 620 exp.StrPosition: str_position_sql, 621 exp.StrToUnix: lambda self, e: self.func( 622 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 623 ), 624 exp.Struct: _struct_sql, 625 exp.Transform: rename_func("LIST_TRANSFORM"), 626 exp.TimeAdd: _date_delta_sql, 627 exp.Time: no_time_sql, 628 exp.TimeDiff: _timediff_sql, 629 exp.Timestamp: no_timestamp_sql, 630 exp.TimestampDiff: lambda self, e: self.func( 631 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 632 ), 633 exp.TimestampTrunc: timestamptrunc_sql(), 634 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 635 exp.TimeStrToTime: timestrtotime_sql, 636 exp.TimeStrToUnix: lambda self, e: self.func( 637 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 638 ), 639 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 640 exp.TimeToUnix: rename_func("EPOCH"), 641 exp.TsOrDiToDi: lambda self, 642 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 643 exp.TsOrDsAdd: _date_delta_sql, 644 exp.TsOrDsDiff: lambda self, e: self.func( 645 "DATE_DIFF", 646 f"'{e.args.get('unit') or 'DAY'}'", 647 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 648 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 649 ), 650 exp.UnixToStr: lambda self, e: self.func( 651 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 652 ), 653 exp.DatetimeTrunc: lambda self, e: self.func( 654 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 655 ), 656 exp.UnixToTime: _unix_to_time_sql, 657 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 658 exp.VariancePop: rename_func("VAR_POP"), 659 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 660 exp.Xor: bool_xor_sql, 661 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 662 rename_func("LEVENSHTEIN") 663 ), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 exp.JSONPathWildcard, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BINARY: "BLOB", 676 exp.DataType.Type.BPCHAR: "TEXT", 677 exp.DataType.Type.CHAR: "TEXT", 678 exp.DataType.Type.DATETIME: "TIMESTAMP", 679 exp.DataType.Type.FLOAT: "REAL", 680 exp.DataType.Type.NCHAR: "TEXT", 681 exp.DataType.Type.NVARCHAR: "TEXT", 682 exp.DataType.Type.UINT: "UINTEGER", 683 exp.DataType.Type.VARBINARY: "BLOB", 684 exp.DataType.Type.ROWVERSION: "BLOB", 685 exp.DataType.Type.VARCHAR: "TEXT", 686 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 687 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 688 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 689 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 690 } 691 692 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 693 RESERVED_KEYWORDS = { 694 "array", 695 "analyse", 696 "union", 697 "all", 698 "when", 699 "in_p", 700 "default", 701 "create_p", 702 "window", 703 "asymmetric", 704 "to", 705 "else", 706 "localtime", 707 "from", 708 "end_p", 709 "select", 710 "current_date", 711 "foreign", 712 "with", 713 "grant", 714 "session_user", 715 "or", 716 "except", 717 "references", 718 "fetch", 719 "limit", 720 "group_p", 721 "leading", 722 "into", 723 "collate", 724 "offset", 725 "do", 726 "then", 727 "localtimestamp", 728 "check_p", 729 "lateral_p", 730 "current_role", 731 "where", 732 "asc_p", 733 "placing", 734 "desc_p", 735 "user", 736 "unique", 737 "initially", 738 "column", 739 "both", 740 "some", 741 "as", 742 "any", 743 "only", 744 "deferrable", 745 "null_p", 746 "current_time", 747 "true_p", 748 "table", 749 "case", 750 "trailing", 751 "variadic", 752 "for", 753 "on", 754 "distinct", 755 "false_p", 756 "not", 757 "constraint", 758 "current_timestamp", 759 "returning", 760 "primary", 761 "intersect", 762 "having", 763 "analyze", 764 "current_user", 765 "and", 766 "cast", 767 "symmetric", 768 "using", 769 "order", 770 "current_catalog", 771 } 772 773 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 774 775 # DuckDB doesn't generally support CREATE TABLE .. properties 776 # https://duckdb.org/docs/sql/statements/create_table.html 777 PROPERTIES_LOCATION = { 778 prop: exp.Properties.Location.UNSUPPORTED 779 for prop in generator.Generator.PROPERTIES_LOCATION 780 } 781 782 # There are a few exceptions (e.g. temporary tables) which are supported or 783 # can be transpiled to DuckDB, so we explicitly override them accordingly 784 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 785 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 786 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 787 788 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 789 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 790 791 def strtotime_sql(self, expression: exp.StrToTime) -> str: 792 if expression.args.get("safe"): 793 formatted_time = self.format_time(expression) 794 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 795 return str_to_time_sql(self, expression) 796 797 def strtodate_sql(self, expression: exp.StrToDate) -> str: 798 if expression.args.get("safe"): 799 formatted_time = self.format_time(expression) 800 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 801 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 802 803 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 804 arg = expression.this 805 if expression.args.get("safe"): 806 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 807 return self.func("JSON", arg) 808 809 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 810 nano = expression.args.get("nano") 811 if nano is not None: 812 expression.set( 813 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 814 ) 815 816 return rename_func("MAKE_TIME")(self, expression) 817 818 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 819 sec = expression.args["sec"] 820 821 milli = expression.args.get("milli") 822 if milli is not None: 823 sec += milli.pop() / exp.Literal.number(1000.0) 824 825 nano = expression.args.get("nano") 826 if nano is not None: 827 sec += nano.pop() / exp.Literal.number(1000000000.0) 828 829 if milli or nano: 830 expression.set("sec", sec) 831 832 return rename_func("MAKE_TIMESTAMP")(self, expression) 833 834 def tablesample_sql( 835 self, 836 expression: exp.TableSample, 837 tablesample_keyword: t.Optional[str] = None, 838 ) -> str: 839 if not isinstance(expression.parent, exp.Select): 840 # This sample clause only applies to a single source, not the entire resulting relation 841 tablesample_keyword = "TABLESAMPLE" 842 843 if expression.args.get("size"): 844 method = expression.args.get("method") 845 if method and method.name.upper() != "RESERVOIR": 846 self.unsupported( 847 f"Sampling method {method} is not supported with a discrete sample count, " 848 "defaulting to reservoir sampling" 849 ) 850 expression.set("method", exp.var("RESERVOIR")) 851 852 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 853 854 def interval_sql(self, expression: exp.Interval) -> str: 855 multiplier: t.Optional[int] = None 856 unit = expression.text("unit").lower() 857 858 if unit.startswith("week"): 859 multiplier = 7 860 if unit.startswith("quarter"): 861 multiplier = 90 862 863 if multiplier: 864 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 865 866 return super().interval_sql(expression) 867 868 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 869 if isinstance(expression.parent, exp.UserDefinedFunction): 870 return self.sql(expression, "this") 871 return super().columndef_sql(expression, sep) 872 873 def join_sql(self, expression: exp.Join) -> str: 874 if ( 875 expression.side == "LEFT" 876 and not expression.args.get("on") 877 and isinstance(expression.this, exp.Unnest) 878 ): 879 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 880 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 881 return super().join_sql(expression.on(exp.true())) 882 883 return super().join_sql(expression) 884 885 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 886 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 887 if expression.args.get("is_end_exclusive"): 888 return rename_func("RANGE")(self, expression) 889 890 return self.function_fallback_sql(expression) 891 892 def bracket_sql(self, expression: exp.Bracket) -> str: 893 this = expression.this 894 if isinstance(this, exp.Array): 895 this.replace(exp.paren(this)) 896 897 bracket = super().bracket_sql(expression) 898 899 if not expression.args.get("returns_list_for_maps"): 900 if not this.type: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 this = annotate_types(this) 904 905 if this.is_type(exp.DataType.Type.MAP): 906 bracket = f"({bracket})[1]" 907 908 return bracket 909 910 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 911 expression_sql = self.sql(expression, "expression") 912 913 func = expression.this 914 if isinstance(func, exp.PERCENTILES): 915 # Make the order key the first arg and slide the fraction to the right 916 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 917 order_col = expression.find(exp.Ordered) 918 if order_col: 919 func.set("expression", func.this) 920 func.set("this", order_col.this) 921 922 this = self.sql(expression, "this").rstrip(")") 923 924 return f"{this}{expression_sql})" 925 926 def length_sql(self, expression: exp.Length) -> str: 927 arg = expression.this 928 929 # Dialects like BQ and Snowflake also accept binary values as args, so 930 # DDB will attempt to infer the type or resort to case/when resolution 931 if not expression.args.get("binary") or arg.is_string: 932 return self.func("LENGTH", arg) 933 934 if not arg.type: 935 from sqlglot.optimizer.annotate_types import annotate_types 936 937 arg = annotate_types(arg) 938 939 if arg.is_type(*exp.DataType.TEXT_TYPES): 940 return self.func("LENGTH", arg) 941 942 # We need these casts to make duckdb's static type checker happy 943 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 944 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 945 946 case = ( 947 exp.case(self.func("TYPEOF", arg)) 948 .when( 949 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 950 ) # anonymous to break length_sql recursion 951 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 952 ) 953 954 return self.sql(case) 955 956 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 957 this = expression.this 958 key = expression.args.get("key") 959 key_sql = key.name if isinstance(key, exp.Expression) else "" 960 value_sql = self.sql(expression, "value") 961 962 kv_sql = f"{key_sql} := {value_sql}" 963 964 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 965 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 966 if isinstance(this, exp.Struct) and not this.expressions: 967 return self.func("STRUCT_PACK", kv_sql) 968 969 return self.func("STRUCT_INSERT", this, kv_sql) 970 971 def unnest_sql(self, expression: exp.Unnest) -> str: 972 explode_array = expression.args.get("explode_array") 973 if explode_array: 974 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 975 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 976 expression.expressions.append( 977 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 978 ) 979 980 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 981 alias = expression.args.get("alias") 982 if alias: 983 expression.set("alias", None) 984 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 985 986 unnest_sql = super().unnest_sql(expression) 987 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 988 return self.sql(select) 989 990 return super().unnest_sql(expression) 991 992 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 993 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 994 # DuckDB should render IGNORE NULLS only for the general-purpose 995 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 996 return super().ignorenulls_sql(expression) 997 998 return self.sql(expression, "this") 999 1000 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1001 this = self.sql(expression, "this") 1002 null_text = self.sql(expression, "null") 1003 1004 if null_text: 1005 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1006 1007 return self.func("ARRAY_TO_STRING", this, expression.expression) 1008 1009 @unsupported_args("position", "occurrence") 1010 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1011 group = expression.args.get("group") 1012 params = expression.args.get("parameters") 1013 1014 # Do not render group if there is no following argument, 1015 # and it's the default value for this dialect 1016 if ( 1017 not params 1018 and group 1019 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1020 ): 1021 group = None 1022 return self.func( 1023 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1024 ) 1025 1026 @unsupported_args("culture") 1027 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1028 fmt = expression.args.get("format") 1029 if fmt and fmt.is_int: 1030 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1031 1032 self.unsupported("Only integer formats are supported by NumberToStr") 1033 return self.function_fallback_sql(expression)
275class DuckDB(Dialect): 276 NULL_ORDERING = "nulls_are_last" 277 SUPPORTS_USER_DEFINED_TYPES = False 278 SAFE_DIVISION = True 279 INDEX_OFFSET = 1 280 CONCAT_COALESCE = True 281 SUPPORTS_ORDER_BY_ALL = True 282 SUPPORTS_FIXED_SIZE_ARRAYS = True 283 STRICT_JSON_PATH_SYNTAX = False 284 285 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 286 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 287 288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path) 299 300 class Tokenizer(tokens.Tokenizer): 301 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 302 HEREDOC_STRINGS = ["$"] 303 304 HEREDOC_TAG_IS_IDENTIFIER = True 305 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "//": TokenType.DIV, 310 "**": TokenType.DSTAR, 311 "^@": TokenType.CARET_AT, 312 "@>": TokenType.AT_GT, 313 "<@": TokenType.LT_AT, 314 "ATTACH": TokenType.ATTACH, 315 "BINARY": TokenType.VARBINARY, 316 "BITSTRING": TokenType.BIT, 317 "BPCHAR": TokenType.TEXT, 318 "CHAR": TokenType.TEXT, 319 "CHARACTER VARYING": TokenType.TEXT, 320 "DETACH": TokenType.DETACH, 321 "EXCLUDE": TokenType.EXCEPT, 322 "LOGICAL": TokenType.BOOLEAN, 323 "ONLY": TokenType.ONLY, 324 "PIVOT_WIDER": TokenType.PIVOT, 325 "POSITIONAL": TokenType.POSITIONAL, 326 "SIGNED": TokenType.INT, 327 "STRING": TokenType.TEXT, 328 "SUMMARIZE": TokenType.SUMMARIZE, 329 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 330 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 331 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 332 "TIMESTAMP_US": TokenType.TIMESTAMP, 333 "UBIGINT": TokenType.UBIGINT, 334 "UINTEGER": TokenType.UINT, 335 "USMALLINT": TokenType.USMALLINT, 336 "UTINYINT": TokenType.UTINYINT, 337 "VARCHAR": TokenType.TEXT, 338 } 339 KEYWORDS.pop("/*+") 340 341 SINGLE_TOKENS = { 342 **tokens.Tokenizer.SINGLE_TOKENS, 343 "$": TokenType.PARAMETER, 344 } 345 346 class Parser(parser.Parser): 347 BITWISE = { 348 **parser.Parser.BITWISE, 349 TokenType.TILDA: exp.RegexpLike, 350 } 351 BITWISE.pop(TokenType.CARET) 352 353 RANGE_PARSERS = { 354 **parser.Parser.RANGE_PARSERS, 355 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 356 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 357 } 358 359 EXPONENT = { 360 **parser.Parser.EXPONENT, 361 TokenType.CARET: exp.Pow, 362 TokenType.DSTAR: exp.Pow, 363 } 364 365 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 366 367 FUNCTIONS = { 368 **parser.Parser.FUNCTIONS, 369 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 370 "ARRAY_SORT": exp.SortArray.from_arg_list, 371 "DATEDIFF": _build_date_diff, 372 "DATE_DIFF": _build_date_diff, 373 "DATE_TRUNC": date_trunc_to_time, 374 "DATETRUNC": date_trunc_to_time, 375 "DECODE": lambda args: exp.Decode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_HAS": exp.ArrayContains.from_arg_list, 389 "LIST_REVERSE_SORT": _build_sort_array_desc, 390 "LIST_SORT": exp.SortArray.from_arg_list, 391 "LIST_VALUE": lambda args: exp.Array(expressions=args), 392 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 393 "MAKE_TIMESTAMP": _build_make_timestamp, 394 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 395 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 396 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 397 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 398 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 399 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 400 this=seq_get(args, 0), 401 expression=seq_get(args, 1), 402 replacement=seq_get(args, 2), 403 modifiers=seq_get(args, 3), 404 ), 405 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 406 "STRING_SPLIT": exp.Split.from_arg_list, 407 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 408 "STRING_TO_ARRAY": exp.Split.from_arg_list, 409 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 410 "STRUCT_PACK": exp.Struct.from_arg_list, 411 "STR_SPLIT": exp.Split.from_arg_list, 412 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 413 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 414 "UNNEST": exp.Explode.from_arg_list, 415 "XOR": binary_from_function(exp.BitwiseXor), 416 "GENERATE_SERIES": _build_generate_series(), 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "EDITDIST3": exp.Levenshtein.from_arg_list, 419 } 420 421 FUNCTIONS.pop("DATE_SUB") 422 FUNCTIONS.pop("GLOB") 423 424 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 425 FUNCTION_PARSERS.pop("DECODE") 426 427 NO_PAREN_FUNCTION_PARSERS = { 428 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 429 "MAP": lambda self: self._parse_map(), 430 } 431 432 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 433 TokenType.SEMI, 434 TokenType.ANTI, 435 } 436 437 PLACEHOLDER_PARSERS = { 438 **parser.Parser.PLACEHOLDER_PARSERS, 439 TokenType.PARAMETER: lambda self: ( 440 self.expression(exp.Placeholder, this=self._prev.text) 441 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 442 else None 443 ), 444 } 445 446 TYPE_CONVERTERS = { 447 # https://duckdb.org/docs/sql/data_types/numeric 448 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 449 # https://duckdb.org/docs/sql/data_types/text 450 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 451 } 452 453 STATEMENT_PARSERS = { 454 **parser.Parser.STATEMENT_PARSERS, 455 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 456 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 457 } 458 459 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 460 # https://duckdb.org/docs/sql/samples.html 461 sample = super()._parse_table_sample(as_modifier=as_modifier) 462 if sample and not sample.args.get("method"): 463 if sample.args.get("size"): 464 sample.set("method", exp.var("RESERVOIR")) 465 else: 466 sample.set("method", exp.var("SYSTEM")) 467 468 return sample 469 470 def _parse_bracket( 471 self, this: t.Optional[exp.Expression] = None 472 ) -> t.Optional[exp.Expression]: 473 bracket = super()._parse_bracket(this) 474 if isinstance(bracket, exp.Bracket): 475 bracket.set("returns_list_for_maps", True) 476 477 return bracket 478 479 def _parse_map(self) -> exp.ToMap | exp.Map: 480 if self._match(TokenType.L_BRACE, advance=False): 481 return self.expression(exp.ToMap, this=self._parse_bracket()) 482 483 args = self._parse_wrapped_csv(self._parse_assignment) 484 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 485 486 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 487 return self._parse_field_def() 488 489 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 490 if len(aggregations) == 1: 491 return super()._pivot_column_names(aggregations) 492 return pivot_column_names(aggregations, dialect="duckdb") 493 494 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 495 def _parse_attach_option() -> exp.AttachOption: 496 return self.expression( 497 exp.AttachOption, 498 this=self._parse_var(any_token=True), 499 expression=self._parse_field(any_token=True), 500 ) 501 502 self._match(TokenType.DATABASE) 503 exists = self._parse_exists(not_=is_attach) 504 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 505 506 if self._match(TokenType.L_PAREN, advance=False): 507 expressions = self._parse_wrapped_csv(_parse_attach_option) 508 else: 509 expressions = None 510 511 return ( 512 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 513 if is_attach 514 else self.expression(exp.Detach, this=this, exists=exists) 515 ) 516 517 class Generator(generator.Generator): 518 PARAMETER_TOKEN = "$" 519 NAMED_PLACEHOLDER_TOKEN = "$" 520 JOIN_HINTS = False 521 TABLE_HINTS = False 522 QUERY_HINTS = False 523 LIMIT_FETCH = "LIMIT" 524 STRUCT_DELIMITER = ("(", ")") 525 RENAME_TABLE_WITH_DB = False 526 NVL2_SUPPORTED = False 527 SEMI_ANTI_JOIN_WITH_SIDE = False 528 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 529 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 530 LAST_DAY_SUPPORTS_DATE_PART = False 531 JSON_KEY_VALUE_PAIR_SEP = "," 532 IGNORE_NULLS_IN_FUNC = True 533 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 534 SUPPORTS_CREATE_TABLE_LIKE = False 535 MULTI_ARG_DISTINCT = False 536 CAN_IMPLEMENT_ARRAY_ANY = True 537 SUPPORTS_TO_NUMBER = False 538 COPY_HAS_INTO_KEYWORD = False 539 STAR_EXCEPT = "EXCLUDE" 540 PAD_FILL_PATTERN_IS_REQUIRED = True 541 ARRAY_CONCAT_IS_VAR_LEN = False 542 ARRAY_SIZE_DIM_REQUIRED = False 543 544 TRANSFORMS = { 545 **generator.Generator.TRANSFORMS, 546 exp.ApproxDistinct: approx_count_distinct_sql, 547 exp.Array: inline_array_unless_query, 548 exp.ArrayFilter: rename_func("LIST_FILTER"), 549 exp.ArraySort: _array_sort_sql, 550 exp.ArraySum: rename_func("LIST_SUM"), 551 exp.BitwiseXor: rename_func("XOR"), 552 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 553 exp.CurrentDate: lambda *_: "CURRENT_DATE", 554 exp.CurrentTime: lambda *_: "CURRENT_TIME", 555 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 556 exp.DayOfMonth: rename_func("DAYOFMONTH"), 557 exp.DayOfWeek: rename_func("DAYOFWEEK"), 558 exp.DayOfWeekIso: rename_func("ISODOW"), 559 exp.DayOfYear: rename_func("DAYOFYEAR"), 560 exp.DataType: _datatype_sql, 561 exp.Date: _date_sql, 562 exp.DateAdd: _date_delta_sql, 563 exp.DateFromParts: rename_func("MAKE_DATE"), 564 exp.DateSub: _date_delta_sql, 565 exp.DateDiff: _date_diff_sql, 566 exp.DateStrToDate: datestrtodate_sql, 567 exp.Datetime: no_datetime_sql, 568 exp.DatetimeSub: _date_delta_sql, 569 exp.DatetimeAdd: _date_delta_sql, 570 exp.DateToDi: lambda self, 571 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 572 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 573 exp.DiToDate: lambda self, 574 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 575 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 576 exp.GenerateDateArray: _generate_datetime_array_sql, 577 exp.GenerateTimestampArray: _generate_datetime_array_sql, 578 exp.Explode: rename_func("UNNEST"), 579 exp.IntDiv: lambda self, e: self.binary(e, "//"), 580 exp.IsInf: rename_func("ISINF"), 581 exp.IsNan: rename_func("ISNAN"), 582 exp.JSONBExists: rename_func("JSON_EXISTS"), 583 exp.JSONExtract: _arrow_json_extract_sql, 584 exp.JSONExtractArray: _json_extract_value_array_sql, 585 exp.JSONExtractScalar: _arrow_json_extract_sql, 586 exp.JSONFormat: _json_format_sql, 587 exp.JSONValueArray: _json_extract_value_array_sql, 588 exp.Lateral: explode_to_unnest_sql, 589 exp.LogicalOr: rename_func("BOOL_OR"), 590 exp.LogicalAnd: rename_func("BOOL_AND"), 591 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 592 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 593 exp.MonthsBetween: lambda self, e: self.func( 594 "DATEDIFF", 595 "'month'", 596 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 597 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 598 ), 599 exp.PercentileCont: rename_func("QUANTILE_CONT"), 600 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 601 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 602 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 603 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 604 exp.RegexpReplace: lambda self, e: self.func( 605 "REGEXP_REPLACE", 606 e.this, 607 e.expression, 608 e.args.get("replacement"), 609 e.args.get("modifiers"), 610 ), 611 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 612 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 613 exp.Return: lambda self, e: self.sql(e, "this"), 614 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 615 exp.Rand: rename_func("RANDOM"), 616 exp.SafeDivide: no_safe_divide_sql, 617 exp.SHA: rename_func("SHA1"), 618 exp.SHA2: sha256_sql, 619 exp.Split: rename_func("STR_SPLIT"), 620 exp.SortArray: _sort_array_sql, 621 exp.StrPosition: str_position_sql, 622 exp.StrToUnix: lambda self, e: self.func( 623 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 624 ), 625 exp.Struct: _struct_sql, 626 exp.Transform: rename_func("LIST_TRANSFORM"), 627 exp.TimeAdd: _date_delta_sql, 628 exp.Time: no_time_sql, 629 exp.TimeDiff: _timediff_sql, 630 exp.Timestamp: no_timestamp_sql, 631 exp.TimestampDiff: lambda self, e: self.func( 632 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 633 ), 634 exp.TimestampTrunc: timestamptrunc_sql(), 635 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 636 exp.TimeStrToTime: timestrtotime_sql, 637 exp.TimeStrToUnix: lambda self, e: self.func( 638 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 639 ), 640 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 641 exp.TimeToUnix: rename_func("EPOCH"), 642 exp.TsOrDiToDi: lambda self, 643 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 644 exp.TsOrDsAdd: _date_delta_sql, 645 exp.TsOrDsDiff: lambda self, e: self.func( 646 "DATE_DIFF", 647 f"'{e.args.get('unit') or 'DAY'}'", 648 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 649 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 650 ), 651 exp.UnixToStr: lambda self, e: self.func( 652 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 653 ), 654 exp.DatetimeTrunc: lambda self, e: self.func( 655 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 656 ), 657 exp.UnixToTime: _unix_to_time_sql, 658 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 659 exp.VariancePop: rename_func("VAR_POP"), 660 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 661 exp.Xor: bool_xor_sql, 662 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 663 rename_func("LEVENSHTEIN") 664 ), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 exp.JSONPathWildcard, 672 } 673 674 TYPE_MAPPING = { 675 **generator.Generator.TYPE_MAPPING, 676 exp.DataType.Type.BINARY: "BLOB", 677 exp.DataType.Type.BPCHAR: "TEXT", 678 exp.DataType.Type.CHAR: "TEXT", 679 exp.DataType.Type.DATETIME: "TIMESTAMP", 680 exp.DataType.Type.FLOAT: "REAL", 681 exp.DataType.Type.NCHAR: "TEXT", 682 exp.DataType.Type.NVARCHAR: "TEXT", 683 exp.DataType.Type.UINT: "UINTEGER", 684 exp.DataType.Type.VARBINARY: "BLOB", 685 exp.DataType.Type.ROWVERSION: "BLOB", 686 exp.DataType.Type.VARCHAR: "TEXT", 687 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 688 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 689 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 690 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 691 } 692 693 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 694 RESERVED_KEYWORDS = { 695 "array", 696 "analyse", 697 "union", 698 "all", 699 "when", 700 "in_p", 701 "default", 702 "create_p", 703 "window", 704 "asymmetric", 705 "to", 706 "else", 707 "localtime", 708 "from", 709 "end_p", 710 "select", 711 "current_date", 712 "foreign", 713 "with", 714 "grant", 715 "session_user", 716 "or", 717 "except", 718 "references", 719 "fetch", 720 "limit", 721 "group_p", 722 "leading", 723 "into", 724 "collate", 725 "offset", 726 "do", 727 "then", 728 "localtimestamp", 729 "check_p", 730 "lateral_p", 731 "current_role", 732 "where", 733 "asc_p", 734 "placing", 735 "desc_p", 736 "user", 737 "unique", 738 "initially", 739 "column", 740 "both", 741 "some", 742 "as", 743 "any", 744 "only", 745 "deferrable", 746 "null_p", 747 "current_time", 748 "true_p", 749 "table", 750 "case", 751 "trailing", 752 "variadic", 753 "for", 754 "on", 755 "distinct", 756 "false_p", 757 "not", 758 "constraint", 759 "current_timestamp", 760 "returning", 761 "primary", 762 "intersect", 763 "having", 764 "analyze", 765 "current_user", 766 "and", 767 "cast", 768 "symmetric", 769 "using", 770 "order", 771 "current_catalog", 772 } 773 774 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 775 776 # DuckDB doesn't generally support CREATE TABLE .. properties 777 # https://duckdb.org/docs/sql/statements/create_table.html 778 PROPERTIES_LOCATION = { 779 prop: exp.Properties.Location.UNSUPPORTED 780 for prop in generator.Generator.PROPERTIES_LOCATION 781 } 782 783 # There are a few exceptions (e.g. temporary tables) which are supported or 784 # can be transpiled to DuckDB, so we explicitly override them accordingly 785 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 786 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 787 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 788 789 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 790 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 791 792 def strtotime_sql(self, expression: exp.StrToTime) -> str: 793 if expression.args.get("safe"): 794 formatted_time = self.format_time(expression) 795 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 796 return str_to_time_sql(self, expression) 797 798 def strtodate_sql(self, expression: exp.StrToDate) -> str: 799 if expression.args.get("safe"): 800 formatted_time = self.format_time(expression) 801 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 802 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 803 804 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 805 arg = expression.this 806 if expression.args.get("safe"): 807 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 808 return self.func("JSON", arg) 809 810 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 811 nano = expression.args.get("nano") 812 if nano is not None: 813 expression.set( 814 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 815 ) 816 817 return rename_func("MAKE_TIME")(self, expression) 818 819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 sec = expression.args["sec"] 821 822 milli = expression.args.get("milli") 823 if milli is not None: 824 sec += milli.pop() / exp.Literal.number(1000.0) 825 826 nano = expression.args.get("nano") 827 if nano is not None: 828 sec += nano.pop() / exp.Literal.number(1000000000.0) 829 830 if milli or nano: 831 expression.set("sec", sec) 832 833 return rename_func("MAKE_TIMESTAMP")(self, expression) 834 835 def tablesample_sql( 836 self, 837 expression: exp.TableSample, 838 tablesample_keyword: t.Optional[str] = None, 839 ) -> str: 840 if not isinstance(expression.parent, exp.Select): 841 # This sample clause only applies to a single source, not the entire resulting relation 842 tablesample_keyword = "TABLESAMPLE" 843 844 if expression.args.get("size"): 845 method = expression.args.get("method") 846 if method and method.name.upper() != "RESERVOIR": 847 self.unsupported( 848 f"Sampling method {method} is not supported with a discrete sample count, " 849 "defaulting to reservoir sampling" 850 ) 851 expression.set("method", exp.var("RESERVOIR")) 852 853 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 854 855 def interval_sql(self, expression: exp.Interval) -> str: 856 multiplier: t.Optional[int] = None 857 unit = expression.text("unit").lower() 858 859 if unit.startswith("week"): 860 multiplier = 7 861 if unit.startswith("quarter"): 862 multiplier = 90 863 864 if multiplier: 865 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 866 867 return super().interval_sql(expression) 868 869 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 870 if isinstance(expression.parent, exp.UserDefinedFunction): 871 return self.sql(expression, "this") 872 return super().columndef_sql(expression, sep) 873 874 def join_sql(self, expression: exp.Join) -> str: 875 if ( 876 expression.side == "LEFT" 877 and not expression.args.get("on") 878 and isinstance(expression.this, exp.Unnest) 879 ): 880 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 881 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 882 return super().join_sql(expression.on(exp.true())) 883 884 return super().join_sql(expression) 885 886 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 887 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 888 if expression.args.get("is_end_exclusive"): 889 return rename_func("RANGE")(self, expression) 890 891 return self.function_fallback_sql(expression) 892 893 def bracket_sql(self, expression: exp.Bracket) -> str: 894 this = expression.this 895 if isinstance(this, exp.Array): 896 this.replace(exp.paren(this)) 897 898 bracket = super().bracket_sql(expression) 899 900 if not expression.args.get("returns_list_for_maps"): 901 if not this.type: 902 from sqlglot.optimizer.annotate_types import annotate_types 903 904 this = annotate_types(this) 905 906 if this.is_type(exp.DataType.Type.MAP): 907 bracket = f"({bracket})[1]" 908 909 return bracket 910 911 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 912 expression_sql = self.sql(expression, "expression") 913 914 func = expression.this 915 if isinstance(func, exp.PERCENTILES): 916 # Make the order key the first arg and slide the fraction to the right 917 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 918 order_col = expression.find(exp.Ordered) 919 if order_col: 920 func.set("expression", func.this) 921 func.set("this", order_col.this) 922 923 this = self.sql(expression, "this").rstrip(")") 924 925 return f"{this}{expression_sql})" 926 927 def length_sql(self, expression: exp.Length) -> str: 928 arg = expression.this 929 930 # Dialects like BQ and Snowflake also accept binary values as args, so 931 # DDB will attempt to infer the type or resort to case/when resolution 932 if not expression.args.get("binary") or arg.is_string: 933 return self.func("LENGTH", arg) 934 935 if not arg.type: 936 from sqlglot.optimizer.annotate_types import annotate_types 937 938 arg = annotate_types(arg) 939 940 if arg.is_type(*exp.DataType.TEXT_TYPES): 941 return self.func("LENGTH", arg) 942 943 # We need these casts to make duckdb's static type checker happy 944 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 945 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 946 947 case = ( 948 exp.case(self.func("TYPEOF", arg)) 949 .when( 950 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 951 ) # anonymous to break length_sql recursion 952 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 953 ) 954 955 return self.sql(case) 956 957 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 958 this = expression.this 959 key = expression.args.get("key") 960 key_sql = key.name if isinstance(key, exp.Expression) else "" 961 value_sql = self.sql(expression, "value") 962 963 kv_sql = f"{key_sql} := {value_sql}" 964 965 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 966 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 967 if isinstance(this, exp.Struct) and not this.expressions: 968 return self.func("STRUCT_PACK", kv_sql) 969 970 return self.func("STRUCT_INSERT", this, kv_sql) 971 972 def unnest_sql(self, expression: exp.Unnest) -> str: 973 explode_array = expression.args.get("explode_array") 974 if explode_array: 975 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 976 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 977 expression.expressions.append( 978 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 979 ) 980 981 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 982 alias = expression.args.get("alias") 983 if alias: 984 expression.set("alias", None) 985 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 986 987 unnest_sql = super().unnest_sql(expression) 988 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 989 return self.sql(select) 990 991 return super().unnest_sql(expression) 992 993 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 994 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 995 # DuckDB should render IGNORE NULLS only for the general-purpose 996 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 997 return super().ignorenulls_sql(expression) 998 999 return self.sql(expression, "this") 1000 1001 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1002 this = self.sql(expression, "this") 1003 null_text = self.sql(expression, "null") 1004 1005 if null_text: 1006 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1007 1008 return self.func("ARRAY_TO_STRING", this, expression.expression) 1009 1010 @unsupported_args("position", "occurrence") 1011 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1012 group = expression.args.get("group") 1013 params = expression.args.get("parameters") 1014 1015 # Do not render group if there is no following argument, 1016 # and it's the default value for this dialect 1017 if ( 1018 not params 1019 and group 1020 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1021 ): 1022 group = None 1023 return self.func( 1024 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1025 ) 1026 1027 @unsupported_args("culture") 1028 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1029 fmt = expression.args.get("format") 1030 if fmt and fmt.is_int: 1031 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1032 1033 self.unsupported("Only integer formats are supported by NumberToStr") 1034 return self.function_fallback_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- NUMBERS_CAN_BE_UNDERSCORE_SEPARATED
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
300 class Tokenizer(tokens.Tokenizer): 301 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 302 HEREDOC_STRINGS = ["$"] 303 304 HEREDOC_TAG_IS_IDENTIFIER = True 305 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "//": TokenType.DIV, 310 "**": TokenType.DSTAR, 311 "^@": TokenType.CARET_AT, 312 "@>": TokenType.AT_GT, 313 "<@": TokenType.LT_AT, 314 "ATTACH": TokenType.ATTACH, 315 "BINARY": TokenType.VARBINARY, 316 "BITSTRING": TokenType.BIT, 317 "BPCHAR": TokenType.TEXT, 318 "CHAR": TokenType.TEXT, 319 "CHARACTER VARYING": TokenType.TEXT, 320 "DETACH": TokenType.DETACH, 321 "EXCLUDE": TokenType.EXCEPT, 322 "LOGICAL": TokenType.BOOLEAN, 323 "ONLY": TokenType.ONLY, 324 "PIVOT_WIDER": TokenType.PIVOT, 325 "POSITIONAL": TokenType.POSITIONAL, 326 "SIGNED": TokenType.INT, 327 "STRING": TokenType.TEXT, 328 "SUMMARIZE": TokenType.SUMMARIZE, 329 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 330 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 331 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 332 "TIMESTAMP_US": TokenType.TIMESTAMP, 333 "UBIGINT": TokenType.UBIGINT, 334 "UINTEGER": TokenType.UINT, 335 "USMALLINT": TokenType.USMALLINT, 336 "UTINYINT": TokenType.UTINYINT, 337 "VARCHAR": TokenType.TEXT, 338 } 339 KEYWORDS.pop("/*+") 340 341 SINGLE_TOKENS = { 342 **tokens.Tokenizer.SINGLE_TOKENS, 343 "$": TokenType.PARAMETER, 344 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
346 class Parser(parser.Parser): 347 BITWISE = { 348 **parser.Parser.BITWISE, 349 TokenType.TILDA: exp.RegexpLike, 350 } 351 BITWISE.pop(TokenType.CARET) 352 353 RANGE_PARSERS = { 354 **parser.Parser.RANGE_PARSERS, 355 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 356 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 357 } 358 359 EXPONENT = { 360 **parser.Parser.EXPONENT, 361 TokenType.CARET: exp.Pow, 362 TokenType.DSTAR: exp.Pow, 363 } 364 365 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 366 367 FUNCTIONS = { 368 **parser.Parser.FUNCTIONS, 369 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 370 "ARRAY_SORT": exp.SortArray.from_arg_list, 371 "DATEDIFF": _build_date_diff, 372 "DATE_DIFF": _build_date_diff, 373 "DATE_TRUNC": date_trunc_to_time, 374 "DATETRUNC": date_trunc_to_time, 375 "DECODE": lambda args: exp.Decode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_HAS": exp.ArrayContains.from_arg_list, 389 "LIST_REVERSE_SORT": _build_sort_array_desc, 390 "LIST_SORT": exp.SortArray.from_arg_list, 391 "LIST_VALUE": lambda args: exp.Array(expressions=args), 392 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 393 "MAKE_TIMESTAMP": _build_make_timestamp, 394 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 395 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 396 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 397 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 398 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 399 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 400 this=seq_get(args, 0), 401 expression=seq_get(args, 1), 402 replacement=seq_get(args, 2), 403 modifiers=seq_get(args, 3), 404 ), 405 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 406 "STRING_SPLIT": exp.Split.from_arg_list, 407 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 408 "STRING_TO_ARRAY": exp.Split.from_arg_list, 409 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 410 "STRUCT_PACK": exp.Struct.from_arg_list, 411 "STR_SPLIT": exp.Split.from_arg_list, 412 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 413 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 414 "UNNEST": exp.Explode.from_arg_list, 415 "XOR": binary_from_function(exp.BitwiseXor), 416 "GENERATE_SERIES": _build_generate_series(), 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "EDITDIST3": exp.Levenshtein.from_arg_list, 419 } 420 421 FUNCTIONS.pop("DATE_SUB") 422 FUNCTIONS.pop("GLOB") 423 424 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 425 FUNCTION_PARSERS.pop("DECODE") 426 427 NO_PAREN_FUNCTION_PARSERS = { 428 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 429 "MAP": lambda self: self._parse_map(), 430 } 431 432 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 433 TokenType.SEMI, 434 TokenType.ANTI, 435 } 436 437 PLACEHOLDER_PARSERS = { 438 **parser.Parser.PLACEHOLDER_PARSERS, 439 TokenType.PARAMETER: lambda self: ( 440 self.expression(exp.Placeholder, this=self._prev.text) 441 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 442 else None 443 ), 444 } 445 446 TYPE_CONVERTERS = { 447 # https://duckdb.org/docs/sql/data_types/numeric 448 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 449 # https://duckdb.org/docs/sql/data_types/text 450 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 451 } 452 453 STATEMENT_PARSERS = { 454 **parser.Parser.STATEMENT_PARSERS, 455 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 456 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 457 } 458 459 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 460 # https://duckdb.org/docs/sql/samples.html 461 sample = super()._parse_table_sample(as_modifier=as_modifier) 462 if sample and not sample.args.get("method"): 463 if sample.args.get("size"): 464 sample.set("method", exp.var("RESERVOIR")) 465 else: 466 sample.set("method", exp.var("SYSTEM")) 467 468 return sample 469 470 def _parse_bracket( 471 self, this: t.Optional[exp.Expression] = None 472 ) -> t.Optional[exp.Expression]: 473 bracket = super()._parse_bracket(this) 474 if isinstance(bracket, exp.Bracket): 475 bracket.set("returns_list_for_maps", True) 476 477 return bracket 478 479 def _parse_map(self) -> exp.ToMap | exp.Map: 480 if self._match(TokenType.L_BRACE, advance=False): 481 return self.expression(exp.ToMap, this=self._parse_bracket()) 482 483 args = self._parse_wrapped_csv(self._parse_assignment) 484 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 485 486 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 487 return self._parse_field_def() 488 489 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 490 if len(aggregations) == 1: 491 return super()._pivot_column_names(aggregations) 492 return pivot_column_names(aggregations, dialect="duckdb") 493 494 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 495 def _parse_attach_option() -> exp.AttachOption: 496 return self.expression( 497 exp.AttachOption, 498 this=self._parse_var(any_token=True), 499 expression=self._parse_field(any_token=True), 500 ) 501 502 self._match(TokenType.DATABASE) 503 exists = self._parse_exists(not_=is_attach) 504 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 505 506 if self._match(TokenType.L_PAREN, advance=False): 507 expressions = self._parse_wrapped_csv(_parse_attach_option) 508 else: 509 expressions = None 510 511 return ( 512 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 513 if is_attach 514 else self.expression(exp.Detach, this=this, exists=exists) 515 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
517 class Generator(generator.Generator): 518 PARAMETER_TOKEN = "$" 519 NAMED_PLACEHOLDER_TOKEN = "$" 520 JOIN_HINTS = False 521 TABLE_HINTS = False 522 QUERY_HINTS = False 523 LIMIT_FETCH = "LIMIT" 524 STRUCT_DELIMITER = ("(", ")") 525 RENAME_TABLE_WITH_DB = False 526 NVL2_SUPPORTED = False 527 SEMI_ANTI_JOIN_WITH_SIDE = False 528 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 529 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 530 LAST_DAY_SUPPORTS_DATE_PART = False 531 JSON_KEY_VALUE_PAIR_SEP = "," 532 IGNORE_NULLS_IN_FUNC = True 533 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 534 SUPPORTS_CREATE_TABLE_LIKE = False 535 MULTI_ARG_DISTINCT = False 536 CAN_IMPLEMENT_ARRAY_ANY = True 537 SUPPORTS_TO_NUMBER = False 538 COPY_HAS_INTO_KEYWORD = False 539 STAR_EXCEPT = "EXCLUDE" 540 PAD_FILL_PATTERN_IS_REQUIRED = True 541 ARRAY_CONCAT_IS_VAR_LEN = False 542 ARRAY_SIZE_DIM_REQUIRED = False 543 544 TRANSFORMS = { 545 **generator.Generator.TRANSFORMS, 546 exp.ApproxDistinct: approx_count_distinct_sql, 547 exp.Array: inline_array_unless_query, 548 exp.ArrayFilter: rename_func("LIST_FILTER"), 549 exp.ArraySort: _array_sort_sql, 550 exp.ArraySum: rename_func("LIST_SUM"), 551 exp.BitwiseXor: rename_func("XOR"), 552 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 553 exp.CurrentDate: lambda *_: "CURRENT_DATE", 554 exp.CurrentTime: lambda *_: "CURRENT_TIME", 555 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 556 exp.DayOfMonth: rename_func("DAYOFMONTH"), 557 exp.DayOfWeek: rename_func("DAYOFWEEK"), 558 exp.DayOfWeekIso: rename_func("ISODOW"), 559 exp.DayOfYear: rename_func("DAYOFYEAR"), 560 exp.DataType: _datatype_sql, 561 exp.Date: _date_sql, 562 exp.DateAdd: _date_delta_sql, 563 exp.DateFromParts: rename_func("MAKE_DATE"), 564 exp.DateSub: _date_delta_sql, 565 exp.DateDiff: _date_diff_sql, 566 exp.DateStrToDate: datestrtodate_sql, 567 exp.Datetime: no_datetime_sql, 568 exp.DatetimeSub: _date_delta_sql, 569 exp.DatetimeAdd: _date_delta_sql, 570 exp.DateToDi: lambda self, 571 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 572 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 573 exp.DiToDate: lambda self, 574 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 575 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 576 exp.GenerateDateArray: _generate_datetime_array_sql, 577 exp.GenerateTimestampArray: _generate_datetime_array_sql, 578 exp.Explode: rename_func("UNNEST"), 579 exp.IntDiv: lambda self, e: self.binary(e, "//"), 580 exp.IsInf: rename_func("ISINF"), 581 exp.IsNan: rename_func("ISNAN"), 582 exp.JSONBExists: rename_func("JSON_EXISTS"), 583 exp.JSONExtract: _arrow_json_extract_sql, 584 exp.JSONExtractArray: _json_extract_value_array_sql, 585 exp.JSONExtractScalar: _arrow_json_extract_sql, 586 exp.JSONFormat: _json_format_sql, 587 exp.JSONValueArray: _json_extract_value_array_sql, 588 exp.Lateral: explode_to_unnest_sql, 589 exp.LogicalOr: rename_func("BOOL_OR"), 590 exp.LogicalAnd: rename_func("BOOL_AND"), 591 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 592 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 593 exp.MonthsBetween: lambda self, e: self.func( 594 "DATEDIFF", 595 "'month'", 596 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 597 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 598 ), 599 exp.PercentileCont: rename_func("QUANTILE_CONT"), 600 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 601 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 602 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 603 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 604 exp.RegexpReplace: lambda self, e: self.func( 605 "REGEXP_REPLACE", 606 e.this, 607 e.expression, 608 e.args.get("replacement"), 609 e.args.get("modifiers"), 610 ), 611 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 612 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 613 exp.Return: lambda self, e: self.sql(e, "this"), 614 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 615 exp.Rand: rename_func("RANDOM"), 616 exp.SafeDivide: no_safe_divide_sql, 617 exp.SHA: rename_func("SHA1"), 618 exp.SHA2: sha256_sql, 619 exp.Split: rename_func("STR_SPLIT"), 620 exp.SortArray: _sort_array_sql, 621 exp.StrPosition: str_position_sql, 622 exp.StrToUnix: lambda self, e: self.func( 623 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 624 ), 625 exp.Struct: _struct_sql, 626 exp.Transform: rename_func("LIST_TRANSFORM"), 627 exp.TimeAdd: _date_delta_sql, 628 exp.Time: no_time_sql, 629 exp.TimeDiff: _timediff_sql, 630 exp.Timestamp: no_timestamp_sql, 631 exp.TimestampDiff: lambda self, e: self.func( 632 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 633 ), 634 exp.TimestampTrunc: timestamptrunc_sql(), 635 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 636 exp.TimeStrToTime: timestrtotime_sql, 637 exp.TimeStrToUnix: lambda self, e: self.func( 638 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 639 ), 640 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 641 exp.TimeToUnix: rename_func("EPOCH"), 642 exp.TsOrDiToDi: lambda self, 643 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 644 exp.TsOrDsAdd: _date_delta_sql, 645 exp.TsOrDsDiff: lambda self, e: self.func( 646 "DATE_DIFF", 647 f"'{e.args.get('unit') or 'DAY'}'", 648 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 649 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 650 ), 651 exp.UnixToStr: lambda self, e: self.func( 652 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 653 ), 654 exp.DatetimeTrunc: lambda self, e: self.func( 655 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 656 ), 657 exp.UnixToTime: _unix_to_time_sql, 658 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 659 exp.VariancePop: rename_func("VAR_POP"), 660 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 661 exp.Xor: bool_xor_sql, 662 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 663 rename_func("LEVENSHTEIN") 664 ), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 exp.JSONPathWildcard, 672 } 673 674 TYPE_MAPPING = { 675 **generator.Generator.TYPE_MAPPING, 676 exp.DataType.Type.BINARY: "BLOB", 677 exp.DataType.Type.BPCHAR: "TEXT", 678 exp.DataType.Type.CHAR: "TEXT", 679 exp.DataType.Type.DATETIME: "TIMESTAMP", 680 exp.DataType.Type.FLOAT: "REAL", 681 exp.DataType.Type.NCHAR: "TEXT", 682 exp.DataType.Type.NVARCHAR: "TEXT", 683 exp.DataType.Type.UINT: "UINTEGER", 684 exp.DataType.Type.VARBINARY: "BLOB", 685 exp.DataType.Type.ROWVERSION: "BLOB", 686 exp.DataType.Type.VARCHAR: "TEXT", 687 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 688 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 689 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 690 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 691 } 692 693 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 694 RESERVED_KEYWORDS = { 695 "array", 696 "analyse", 697 "union", 698 "all", 699 "when", 700 "in_p", 701 "default", 702 "create_p", 703 "window", 704 "asymmetric", 705 "to", 706 "else", 707 "localtime", 708 "from", 709 "end_p", 710 "select", 711 "current_date", 712 "foreign", 713 "with", 714 "grant", 715 "session_user", 716 "or", 717 "except", 718 "references", 719 "fetch", 720 "limit", 721 "group_p", 722 "leading", 723 "into", 724 "collate", 725 "offset", 726 "do", 727 "then", 728 "localtimestamp", 729 "check_p", 730 "lateral_p", 731 "current_role", 732 "where", 733 "asc_p", 734 "placing", 735 "desc_p", 736 "user", 737 "unique", 738 "initially", 739 "column", 740 "both", 741 "some", 742 "as", 743 "any", 744 "only", 745 "deferrable", 746 "null_p", 747 "current_time", 748 "true_p", 749 "table", 750 "case", 751 "trailing", 752 "variadic", 753 "for", 754 "on", 755 "distinct", 756 "false_p", 757 "not", 758 "constraint", 759 "current_timestamp", 760 "returning", 761 "primary", 762 "intersect", 763 "having", 764 "analyze", 765 "current_user", 766 "and", 767 "cast", 768 "symmetric", 769 "using", 770 "order", 771 "current_catalog", 772 } 773 774 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 775 776 # DuckDB doesn't generally support CREATE TABLE .. properties 777 # https://duckdb.org/docs/sql/statements/create_table.html 778 PROPERTIES_LOCATION = { 779 prop: exp.Properties.Location.UNSUPPORTED 780 for prop in generator.Generator.PROPERTIES_LOCATION 781 } 782 783 # There are a few exceptions (e.g. temporary tables) which are supported or 784 # can be transpiled to DuckDB, so we explicitly override them accordingly 785 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 786 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 787 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 788 789 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 790 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 791 792 def strtotime_sql(self, expression: exp.StrToTime) -> str: 793 if expression.args.get("safe"): 794 formatted_time = self.format_time(expression) 795 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 796 return str_to_time_sql(self, expression) 797 798 def strtodate_sql(self, expression: exp.StrToDate) -> str: 799 if expression.args.get("safe"): 800 formatted_time = self.format_time(expression) 801 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 802 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 803 804 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 805 arg = expression.this 806 if expression.args.get("safe"): 807 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 808 return self.func("JSON", arg) 809 810 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 811 nano = expression.args.get("nano") 812 if nano is not None: 813 expression.set( 814 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 815 ) 816 817 return rename_func("MAKE_TIME")(self, expression) 818 819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 sec = expression.args["sec"] 821 822 milli = expression.args.get("milli") 823 if milli is not None: 824 sec += milli.pop() / exp.Literal.number(1000.0) 825 826 nano = expression.args.get("nano") 827 if nano is not None: 828 sec += nano.pop() / exp.Literal.number(1000000000.0) 829 830 if milli or nano: 831 expression.set("sec", sec) 832 833 return rename_func("MAKE_TIMESTAMP")(self, expression) 834 835 def tablesample_sql( 836 self, 837 expression: exp.TableSample, 838 tablesample_keyword: t.Optional[str] = None, 839 ) -> str: 840 if not isinstance(expression.parent, exp.Select): 841 # This sample clause only applies to a single source, not the entire resulting relation 842 tablesample_keyword = "TABLESAMPLE" 843 844 if expression.args.get("size"): 845 method = expression.args.get("method") 846 if method and method.name.upper() != "RESERVOIR": 847 self.unsupported( 848 f"Sampling method {method} is not supported with a discrete sample count, " 849 "defaulting to reservoir sampling" 850 ) 851 expression.set("method", exp.var("RESERVOIR")) 852 853 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 854 855 def interval_sql(self, expression: exp.Interval) -> str: 856 multiplier: t.Optional[int] = None 857 unit = expression.text("unit").lower() 858 859 if unit.startswith("week"): 860 multiplier = 7 861 if unit.startswith("quarter"): 862 multiplier = 90 863 864 if multiplier: 865 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 866 867 return super().interval_sql(expression) 868 869 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 870 if isinstance(expression.parent, exp.UserDefinedFunction): 871 return self.sql(expression, "this") 872 return super().columndef_sql(expression, sep) 873 874 def join_sql(self, expression: exp.Join) -> str: 875 if ( 876 expression.side == "LEFT" 877 and not expression.args.get("on") 878 and isinstance(expression.this, exp.Unnest) 879 ): 880 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 881 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 882 return super().join_sql(expression.on(exp.true())) 883 884 return super().join_sql(expression) 885 886 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 887 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 888 if expression.args.get("is_end_exclusive"): 889 return rename_func("RANGE")(self, expression) 890 891 return self.function_fallback_sql(expression) 892 893 def bracket_sql(self, expression: exp.Bracket) -> str: 894 this = expression.this 895 if isinstance(this, exp.Array): 896 this.replace(exp.paren(this)) 897 898 bracket = super().bracket_sql(expression) 899 900 if not expression.args.get("returns_list_for_maps"): 901 if not this.type: 902 from sqlglot.optimizer.annotate_types import annotate_types 903 904 this = annotate_types(this) 905 906 if this.is_type(exp.DataType.Type.MAP): 907 bracket = f"({bracket})[1]" 908 909 return bracket 910 911 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 912 expression_sql = self.sql(expression, "expression") 913 914 func = expression.this 915 if isinstance(func, exp.PERCENTILES): 916 # Make the order key the first arg and slide the fraction to the right 917 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 918 order_col = expression.find(exp.Ordered) 919 if order_col: 920 func.set("expression", func.this) 921 func.set("this", order_col.this) 922 923 this = self.sql(expression, "this").rstrip(")") 924 925 return f"{this}{expression_sql})" 926 927 def length_sql(self, expression: exp.Length) -> str: 928 arg = expression.this 929 930 # Dialects like BQ and Snowflake also accept binary values as args, so 931 # DDB will attempt to infer the type or resort to case/when resolution 932 if not expression.args.get("binary") or arg.is_string: 933 return self.func("LENGTH", arg) 934 935 if not arg.type: 936 from sqlglot.optimizer.annotate_types import annotate_types 937 938 arg = annotate_types(arg) 939 940 if arg.is_type(*exp.DataType.TEXT_TYPES): 941 return self.func("LENGTH", arg) 942 943 # We need these casts to make duckdb's static type checker happy 944 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 945 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 946 947 case = ( 948 exp.case(self.func("TYPEOF", arg)) 949 .when( 950 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 951 ) # anonymous to break length_sql recursion 952 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 953 ) 954 955 return self.sql(case) 956 957 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 958 this = expression.this 959 key = expression.args.get("key") 960 key_sql = key.name if isinstance(key, exp.Expression) else "" 961 value_sql = self.sql(expression, "value") 962 963 kv_sql = f"{key_sql} := {value_sql}" 964 965 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 966 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 967 if isinstance(this, exp.Struct) and not this.expressions: 968 return self.func("STRUCT_PACK", kv_sql) 969 970 return self.func("STRUCT_INSERT", this, kv_sql) 971 972 def unnest_sql(self, expression: exp.Unnest) -> str: 973 explode_array = expression.args.get("explode_array") 974 if explode_array: 975 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 976 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 977 expression.expressions.append( 978 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 979 ) 980 981 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 982 alias = expression.args.get("alias") 983 if alias: 984 expression.set("alias", None) 985 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 986 987 unnest_sql = super().unnest_sql(expression) 988 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 989 return self.sql(select) 990 991 return super().unnest_sql(expression) 992 993 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 994 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 995 # DuckDB should render IGNORE NULLS only for the general-purpose 996 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 997 return super().ignorenulls_sql(expression) 998 999 return self.sql(expression, "this") 1000 1001 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1002 this = self.sql(expression, "this") 1003 null_text = self.sql(expression, "null") 1004 1005 if null_text: 1006 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1007 1008 return self.func("ARRAY_TO_STRING", this, expression.expression) 1009 1010 @unsupported_args("position", "occurrence") 1011 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1012 group = expression.args.get("group") 1013 params = expression.args.get("parameters") 1014 1015 # Do not render group if there is no following argument, 1016 # and it's the default value for this dialect 1017 if ( 1018 not params 1019 and group 1020 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1021 ): 1022 group = None 1023 return self.func( 1024 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1025 ) 1026 1027 @unsupported_args("culture") 1028 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1029 fmt = expression.args.get("format") 1030 if fmt and fmt.is_int: 1031 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1032 1033 self.unsupported("Only integer formats are supported by NumberToStr") 1034 return self.function_fallback_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
798 def strtodate_sql(self, expression: exp.StrToDate) -> str: 799 if expression.args.get("safe"): 800 formatted_time = self.format_time(expression) 801 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 802 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
810 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 811 nano = expression.args.get("nano") 812 if nano is not None: 813 expression.set( 814 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 815 ) 816 817 return rename_func("MAKE_TIME")(self, expression)
819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 sec = expression.args["sec"] 821 822 milli = expression.args.get("milli") 823 if milli is not None: 824 sec += milli.pop() / exp.Literal.number(1000.0) 825 826 nano = expression.args.get("nano") 827 if nano is not None: 828 sec += nano.pop() / exp.Literal.number(1000000000.0) 829 830 if milli or nano: 831 expression.set("sec", sec) 832 833 return rename_func("MAKE_TIMESTAMP")(self, expression)
835 def tablesample_sql( 836 self, 837 expression: exp.TableSample, 838 tablesample_keyword: t.Optional[str] = None, 839 ) -> str: 840 if not isinstance(expression.parent, exp.Select): 841 # This sample clause only applies to a single source, not the entire resulting relation 842 tablesample_keyword = "TABLESAMPLE" 843 844 if expression.args.get("size"): 845 method = expression.args.get("method") 846 if method and method.name.upper() != "RESERVOIR": 847 self.unsupported( 848 f"Sampling method {method} is not supported with a discrete sample count, " 849 "defaulting to reservoir sampling" 850 ) 851 expression.set("method", exp.var("RESERVOIR")) 852 853 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
855 def interval_sql(self, expression: exp.Interval) -> str: 856 multiplier: t.Optional[int] = None 857 unit = expression.text("unit").lower() 858 859 if unit.startswith("week"): 860 multiplier = 7 861 if unit.startswith("quarter"): 862 multiplier = 90 863 864 if multiplier: 865 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 866 867 return super().interval_sql(expression)
874 def join_sql(self, expression: exp.Join) -> str: 875 if ( 876 expression.side == "LEFT" 877 and not expression.args.get("on") 878 and isinstance(expression.this, exp.Unnest) 879 ): 880 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 881 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 882 return super().join_sql(expression.on(exp.true())) 883 884 return super().join_sql(expression)
893 def bracket_sql(self, expression: exp.Bracket) -> str: 894 this = expression.this 895 if isinstance(this, exp.Array): 896 this.replace(exp.paren(this)) 897 898 bracket = super().bracket_sql(expression) 899 900 if not expression.args.get("returns_list_for_maps"): 901 if not this.type: 902 from sqlglot.optimizer.annotate_types import annotate_types 903 904 this = annotate_types(this) 905 906 if this.is_type(exp.DataType.Type.MAP): 907 bracket = f"({bracket})[1]" 908 909 return bracket
911 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 912 expression_sql = self.sql(expression, "expression") 913 914 func = expression.this 915 if isinstance(func, exp.PERCENTILES): 916 # Make the order key the first arg and slide the fraction to the right 917 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 918 order_col = expression.find(exp.Ordered) 919 if order_col: 920 func.set("expression", func.this) 921 func.set("this", order_col.this) 922 923 this = self.sql(expression, "this").rstrip(")") 924 925 return f"{this}{expression_sql})"
927 def length_sql(self, expression: exp.Length) -> str: 928 arg = expression.this 929 930 # Dialects like BQ and Snowflake also accept binary values as args, so 931 # DDB will attempt to infer the type or resort to case/when resolution 932 if not expression.args.get("binary") or arg.is_string: 933 return self.func("LENGTH", arg) 934 935 if not arg.type: 936 from sqlglot.optimizer.annotate_types import annotate_types 937 938 arg = annotate_types(arg) 939 940 if arg.is_type(*exp.DataType.TEXT_TYPES): 941 return self.func("LENGTH", arg) 942 943 # We need these casts to make duckdb's static type checker happy 944 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 945 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 946 947 case = ( 948 exp.case(self.func("TYPEOF", arg)) 949 .when( 950 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 951 ) # anonymous to break length_sql recursion 952 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 953 ) 954 955 return self.sql(case)
957 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 958 this = expression.this 959 key = expression.args.get("key") 960 key_sql = key.name if isinstance(key, exp.Expression) else "" 961 value_sql = self.sql(expression, "value") 962 963 kv_sql = f"{key_sql} := {value_sql}" 964 965 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 966 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 967 if isinstance(this, exp.Struct) and not this.expressions: 968 return self.func("STRUCT_PACK", kv_sql) 969 970 return self.func("STRUCT_INSERT", this, kv_sql)
972 def unnest_sql(self, expression: exp.Unnest) -> str: 973 explode_array = expression.args.get("explode_array") 974 if explode_array: 975 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 976 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 977 expression.expressions.append( 978 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 979 ) 980 981 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 982 alias = expression.args.get("alias") 983 if alias: 984 expression.set("alias", None) 985 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 986 987 unnest_sql = super().unnest_sql(expression) 988 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 989 return self.sql(select) 990 991 return super().unnest_sql(expression)
993 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 994 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 995 # DuckDB should render IGNORE NULLS only for the general-purpose 996 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 997 return super().ignorenulls_sql(expression) 998 999 return self.sql(expression, "this")
1001 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1002 this = self.sql(expression, "this") 1003 null_text = self.sql(expression, "null") 1004 1005 if null_text: 1006 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1007 1008 return self.func("ARRAY_TO_STRING", this, expression.expression)
1010 @unsupported_args("position", "occurrence") 1011 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1012 group = expression.args.get("group") 1013 params = expression.args.get("parameters") 1014 1015 # Do not render group if there is no following argument, 1016 # and it's the default value for this dialect 1017 if ( 1018 not params 1019 and group 1020 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1021 ): 1022 group = None 1023 return self.func( 1024 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1025 )
1027 @unsupported_args("culture") 1028 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1029 fmt = expression.args.get("format") 1030 if fmt and fmt.is_int: 1031 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1032 1033 self.unsupported("Only integer formats are supported by NumberToStr") 1034 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql