sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6 7from sqlglot.expressions import DATA_TYPE 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 JSON_EXTRACT_TYPE, 11 NormalizationStrategy, 12 Version, 13 approx_count_distinct_sql, 14 arrow_json_extract_sql, 15 binary_from_function, 16 bool_xor_sql, 17 build_default_decimal_type, 18 count_if_to_sum, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 no_datetime_sql, 22 encode_decode_sql, 23 build_formatted_time, 24 inline_array_unless_query, 25 no_comment_column_constraint_sql, 26 no_time_sql, 27 no_timestamp_sql, 28 pivot_column_names, 29 rename_func, 30 remove_from_array_using_filter, 31 strposition_sql, 32 str_to_time_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 unit_to_var, 36 unit_to_str, 37 sha256_sql, 38 build_regexp_extract, 39 explode_to_unnest_sql, 40 no_make_interval_sql, 41 groupconcat_sql, 42) 43from sqlglot.generator import unsupported_args 44from sqlglot.helper import seq_get 45from sqlglot.tokens import TokenType 46from sqlglot.parser import binary_range_parser 47 48DATETIME_DELTA = t.Union[ 49 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 50] 51 52 53def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 54 this = expression.this 55 unit = unit_to_var(expression) 56 op = ( 57 "+" 58 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 59 else "-" 60 ) 61 62 to_type: t.Optional[DATA_TYPE] = None 63 if isinstance(expression, exp.TsOrDsAdd): 64 to_type = expression.return_type 65 elif this.is_string: 66 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 67 to_type = ( 68 exp.DataType.Type.DATETIME 69 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 70 else exp.DataType.Type.DATE 71 ) 72 73 this = exp.cast(this, to_type) if to_type else this 74 75 expr = expression.expression 76 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 77 78 return f"{self.sql(this)} {op} {self.sql(interval)}" 79 80 81# BigQuery -> DuckDB conversion for the DATE function 82def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 83 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 84 zone = self.sql(expression, "zone") 85 86 if zone: 87 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 88 date_str = f"{date_str} || ' ' || {zone}" 89 90 # This will create a TIMESTAMP with time zone information 91 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 92 93 return result 94 95 96# BigQuery -> DuckDB conversion for the TIME_DIFF function 97def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 98 this = exp.cast(expression.this, exp.DataType.Type.TIME) 99 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 100 101 # Although the 2 dialects share similar signatures, BQ seems to inverse 102 # the sign of the result so the start/end time operands are flipped 103 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 104 105 106@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 107def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 108 return self.func("ARRAY_SORT", expression.this) 109 110 111def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 112 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 113 return self.func(name, expression.this) 114 115 116def _build_sort_array_desc(args: t.List) -> exp.Expression: 117 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 118 119 120def _build_date_diff(args: t.List) -> exp.Expression: 121 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 122 123 124def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 125 def _builder(args: t.List) -> exp.GenerateSeries: 126 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 127 if len(args) == 1: 128 # DuckDB uses 0 as a default for the series' start when it's omitted 129 args.insert(0, exp.Literal.number("0")) 130 131 gen_series = exp.GenerateSeries.from_arg_list(args) 132 gen_series.set("is_end_exclusive", end_exclusive) 133 134 return gen_series 135 136 return _builder 137 138 139def _build_make_timestamp(args: t.List) -> exp.Expression: 140 if len(args) == 1: 141 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 142 143 return exp.TimestampFromParts( 144 year=seq_get(args, 0), 145 month=seq_get(args, 1), 146 day=seq_get(args, 2), 147 hour=seq_get(args, 3), 148 min=seq_get(args, 4), 149 sec=seq_get(args, 5), 150 ) 151 152 153def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 154 def _parse(self: DuckDB.Parser) -> exp.Show: 155 return self._parse_show_duckdb(*args, **kwargs) 156 157 return _parse 158 159 160def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 161 args: t.List[str] = [] 162 163 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 164 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 165 # The transformation to ROW will take place if: 166 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 167 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 168 ancestor_cast = expression.find_ancestor(exp.Cast) 169 is_bq_inline_struct = ( 170 (expression.find(exp.PropertyEQ) is None) 171 and ancestor_cast 172 and any( 173 casted_type.is_type(exp.DataType.Type.STRUCT) 174 for casted_type in ancestor_cast.find_all(exp.DataType) 175 ) 176 ) 177 178 for i, expr in enumerate(expression.expressions): 179 is_property_eq = isinstance(expr, exp.PropertyEQ) 180 value = expr.expression if is_property_eq else expr 181 182 if is_bq_inline_struct: 183 args.append(self.sql(value)) 184 else: 185 if is_property_eq: 186 if isinstance(expr.this, exp.Identifier): 187 key = self.sql(exp.Literal.string(expr.name)) 188 else: 189 key = self.sql(expr.this) 190 else: 191 key = self.sql(exp.Literal.string(f"_{i}")) 192 193 args.append(f"{key}: {self.sql(value)}") 194 195 csv_args = ", ".join(args) 196 197 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 198 199 200def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 201 if expression.is_type("array"): 202 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 203 204 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 205 if expression.is_type( 206 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 207 ): 208 return expression.this.value 209 210 return self.datatype_sql(expression) 211 212 213def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 214 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 215 return f"CAST({sql} AS TEXT)" 216 217 218def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 219 scale = expression.args.get("scale") 220 timestamp = expression.this 221 222 if scale in (None, exp.UnixToTime.SECONDS): 223 return self.func("TO_TIMESTAMP", timestamp) 224 if scale == exp.UnixToTime.MILLIS: 225 return self.func("EPOCH_MS", timestamp) 226 if scale == exp.UnixToTime.MICROS: 227 return self.func("MAKE_TIMESTAMP", timestamp) 228 229 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 230 231 232WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 233 234 235def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 236 arrow_sql = arrow_json_extract_sql(self, expression) 237 if not expression.same_parent and isinstance( 238 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 239 ): 240 arrow_sql = self.wrap(arrow_sql) 241 return arrow_sql 242 243 244def _implicit_datetime_cast( 245 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 246) -> t.Optional[exp.Expression]: 247 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 248 249 250def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 251 this = _implicit_datetime_cast(expression.this) 252 expr = _implicit_datetime_cast(expression.expression) 253 254 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 255 256 257def _generate_datetime_array_sql( 258 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 259) -> str: 260 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 261 262 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 263 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 264 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 265 266 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 267 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 268 start=start, end=end, step=expression.args.get("step") 269 ) 270 271 if is_generate_date_array: 272 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 273 # GENERATE_DATE_ARRAY we must cast it back to DATE array 274 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 275 276 return self.sql(gen_series) 277 278 279def _json_extract_value_array_sql( 280 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 281) -> str: 282 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 283 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 284 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 285 286 287class DuckDB(Dialect): 288 NULL_ORDERING = "nulls_are_last" 289 SUPPORTS_USER_DEFINED_TYPES = True 290 SAFE_DIVISION = True 291 INDEX_OFFSET = 1 292 CONCAT_COALESCE = True 293 SUPPORTS_ORDER_BY_ALL = True 294 SUPPORTS_FIXED_SIZE_ARRAYS = True 295 STRICT_JSON_PATH_SYNTAX = False 296 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 297 298 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 299 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 300 301 DATE_PART_MAPPING = { 302 **Dialect.DATE_PART_MAPPING, 303 "DAYOFWEEKISO": "ISODOW", 304 } 305 DATE_PART_MAPPING.pop("WEEKDAY") 306 307 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 308 if isinstance(path, exp.Literal): 309 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 310 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 311 # This check ensures we'll avoid trying to parse these as JSON paths, which can 312 # either result in a noisy warning or in an invalid representation of the path. 313 path_text = path.name 314 if path_text.startswith("/") or "[#" in path_text: 315 return path 316 317 return super().to_json_path(path) 318 319 class Tokenizer(tokens.Tokenizer): 320 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 321 HEREDOC_STRINGS = ["$"] 322 323 HEREDOC_TAG_IS_IDENTIFIER = True 324 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 325 326 KEYWORDS = { 327 **tokens.Tokenizer.KEYWORDS, 328 "//": TokenType.DIV, 329 "**": TokenType.DSTAR, 330 "^@": TokenType.CARET_AT, 331 "@>": TokenType.AT_GT, 332 "<@": TokenType.LT_AT, 333 "ATTACH": TokenType.ATTACH, 334 "BINARY": TokenType.VARBINARY, 335 "BITSTRING": TokenType.BIT, 336 "BPCHAR": TokenType.TEXT, 337 "CHAR": TokenType.TEXT, 338 "DATETIME": TokenType.TIMESTAMPNTZ, 339 "DETACH": TokenType.DETACH, 340 "EXCLUDE": TokenType.EXCEPT, 341 "LOGICAL": TokenType.BOOLEAN, 342 "ONLY": TokenType.ONLY, 343 "PIVOT_WIDER": TokenType.PIVOT, 344 "POSITIONAL": TokenType.POSITIONAL, 345 "RESET": TokenType.COMMAND, 346 "SIGNED": TokenType.INT, 347 "STRING": TokenType.TEXT, 348 "SUMMARIZE": TokenType.SUMMARIZE, 349 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 350 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 351 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 352 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 353 "TIMESTAMP_US": TokenType.TIMESTAMP, 354 "UBIGINT": TokenType.UBIGINT, 355 "UINTEGER": TokenType.UINT, 356 "USMALLINT": TokenType.USMALLINT, 357 "UTINYINT": TokenType.UTINYINT, 358 "VARCHAR": TokenType.TEXT, 359 } 360 KEYWORDS.pop("/*+") 361 362 SINGLE_TOKENS = { 363 **tokens.Tokenizer.SINGLE_TOKENS, 364 "$": TokenType.PARAMETER, 365 } 366 367 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 368 369 class Parser(parser.Parser): 370 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 371 372 BITWISE = { 373 **parser.Parser.BITWISE, 374 TokenType.TILDA: exp.RegexpLike, 375 } 376 BITWISE.pop(TokenType.CARET) 377 378 RANGE_PARSERS = { 379 **parser.Parser.RANGE_PARSERS, 380 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 381 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 382 } 383 384 EXPONENT = { 385 **parser.Parser.EXPONENT, 386 TokenType.CARET: exp.Pow, 387 TokenType.DSTAR: exp.Pow, 388 } 389 390 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 391 392 SHOW_PARSERS = { 393 "TABLES": _show_parser("TABLES"), 394 "ALL TABLES": _show_parser("ALL TABLES"), 395 } 396 397 FUNCTIONS = { 398 **parser.Parser.FUNCTIONS, 399 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 400 "ARRAY_SORT": exp.SortArray.from_arg_list, 401 "DATEDIFF": _build_date_diff, 402 "DATE_DIFF": _build_date_diff, 403 "DATE_TRUNC": date_trunc_to_time, 404 "DATETRUNC": date_trunc_to_time, 405 "DECODE": lambda args: exp.Decode( 406 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 407 ), 408 "EDITDIST3": exp.Levenshtein.from_arg_list, 409 "ENCODE": lambda args: exp.Encode( 410 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 411 ), 412 "EPOCH": exp.TimeToUnix.from_arg_list, 413 "EPOCH_MS": lambda args: exp.UnixToTime( 414 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 415 ), 416 "GENERATE_SERIES": _build_generate_series(), 417 "JSON": exp.ParseJSON.from_arg_list, 418 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 419 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 420 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 421 "LIST_HAS": exp.ArrayContains.from_arg_list, 422 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 423 "LIST_REVERSE_SORT": _build_sort_array_desc, 424 "LIST_SORT": exp.SortArray.from_arg_list, 425 "LIST_VALUE": lambda args: exp.Array(expressions=args), 426 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 427 "MAKE_TIMESTAMP": _build_make_timestamp, 428 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 429 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 430 "RANGE": _build_generate_series(end_exclusive=True), 431 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 432 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 433 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 434 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 435 this=seq_get(args, 0), 436 expression=seq_get(args, 1), 437 replacement=seq_get(args, 2), 438 modifiers=seq_get(args, 3), 439 ), 440 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 441 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 442 "STRING_SPLIT": exp.Split.from_arg_list, 443 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 444 "STRING_TO_ARRAY": exp.Split.from_arg_list, 445 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 446 "STRUCT_PACK": exp.Struct.from_arg_list, 447 "STR_SPLIT": exp.Split.from_arg_list, 448 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 449 "TIME_BUCKET": exp.DateBin.from_arg_list, 450 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 451 "UNNEST": exp.Explode.from_arg_list, 452 "XOR": binary_from_function(exp.BitwiseXor), 453 } 454 455 FUNCTIONS.pop("DATE_SUB") 456 FUNCTIONS.pop("GLOB") 457 458 FUNCTION_PARSERS = { 459 **parser.Parser.FUNCTION_PARSERS, 460 **dict.fromkeys( 461 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 462 ), 463 } 464 FUNCTION_PARSERS.pop("DECODE") 465 466 NO_PAREN_FUNCTION_PARSERS = { 467 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 468 "MAP": lambda self: self._parse_map(), 469 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 470 } 471 472 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 473 TokenType.SEMI, 474 TokenType.ANTI, 475 } 476 477 PLACEHOLDER_PARSERS = { 478 **parser.Parser.PLACEHOLDER_PARSERS, 479 TokenType.PARAMETER: lambda self: ( 480 self.expression(exp.Placeholder, this=self._prev.text) 481 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 482 else None 483 ), 484 } 485 486 TYPE_CONVERTERS = { 487 # https://duckdb.org/docs/sql/data_types/numeric 488 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 489 # https://duckdb.org/docs/sql/data_types/text 490 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 491 } 492 493 STATEMENT_PARSERS = { 494 **parser.Parser.STATEMENT_PARSERS, 495 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 496 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 497 TokenType.SHOW: lambda self: self._parse_show(), 498 } 499 500 SET_PARSERS = { 501 **parser.Parser.SET_PARSERS, 502 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 503 } 504 505 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 506 index = self._index 507 if not self._match_text_seq("LAMBDA"): 508 return super()._parse_lambda(alias=alias) 509 510 expressions = self._parse_csv(self._parse_lambda_arg) 511 if not self._match(TokenType.COLON): 512 self._retreat(index) 513 return None 514 515 this = self._replace_lambda(self._parse_assignment(), expressions) 516 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 517 518 def _parse_expression(self) -> t.Optional[exp.Expression]: 519 # DuckDB supports prefix aliases, e.g. foo: 1 520 if self._next and self._next.token_type == TokenType.COLON: 521 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 522 self._match(TokenType.COLON) 523 comments = self._prev_comments or [] 524 525 this = self._parse_assignment() 526 if isinstance(this, exp.Expression): 527 # Moves the comment next to the alias in `alias: expr /* comment */` 528 comments += this.pop_comments() or [] 529 530 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 531 532 return super()._parse_expression() 533 534 def _parse_table( 535 self, 536 schema: bool = False, 537 joins: bool = False, 538 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 539 parse_bracket: bool = False, 540 is_db_reference: bool = False, 541 parse_partition: bool = False, 542 consume_pipe: bool = False, 543 ) -> t.Optional[exp.Expression]: 544 # DuckDB supports prefix aliases, e.g. FROM foo: bar 545 if self._next and self._next.token_type == TokenType.COLON: 546 alias = self._parse_table_alias( 547 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 548 ) 549 self._match(TokenType.COLON) 550 comments = self._prev_comments or [] 551 else: 552 alias = None 553 comments = [] 554 555 table = super()._parse_table( 556 schema=schema, 557 joins=joins, 558 alias_tokens=alias_tokens, 559 parse_bracket=parse_bracket, 560 is_db_reference=is_db_reference, 561 parse_partition=parse_partition, 562 ) 563 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 564 # Moves the comment next to the alias in `alias: table /* comment */` 565 comments += table.pop_comments() or [] 566 alias.comments = alias.pop_comments() + comments 567 table.set("alias", alias) 568 569 return table 570 571 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 572 # https://duckdb.org/docs/sql/samples.html 573 sample = super()._parse_table_sample(as_modifier=as_modifier) 574 if sample and not sample.args.get("method"): 575 if sample.args.get("size"): 576 sample.set("method", exp.var("RESERVOIR")) 577 else: 578 sample.set("method", exp.var("SYSTEM")) 579 580 return sample 581 582 def _parse_bracket( 583 self, this: t.Optional[exp.Expression] = None 584 ) -> t.Optional[exp.Expression]: 585 bracket = super()._parse_bracket(this) 586 587 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 588 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 589 bracket.set("returns_list_for_maps", True) 590 591 return bracket 592 593 def _parse_map(self) -> exp.ToMap | exp.Map: 594 if self._match(TokenType.L_BRACE, advance=False): 595 return self.expression(exp.ToMap, this=self._parse_bracket()) 596 597 args = self._parse_wrapped_csv(self._parse_assignment) 598 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 599 600 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 601 return self._parse_field_def() 602 603 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 604 if len(aggregations) == 1: 605 return super()._pivot_column_names(aggregations) 606 return pivot_column_names(aggregations, dialect="duckdb") 607 608 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 609 def _parse_attach_option() -> exp.AttachOption: 610 return self.expression( 611 exp.AttachOption, 612 this=self._parse_var(any_token=True), 613 expression=self._parse_field(any_token=True), 614 ) 615 616 self._match(TokenType.DATABASE) 617 exists = self._parse_exists(not_=is_attach) 618 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 619 620 if self._match(TokenType.L_PAREN, advance=False): 621 expressions = self._parse_wrapped_csv(_parse_attach_option) 622 else: 623 expressions = None 624 625 return ( 626 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 627 if is_attach 628 else self.expression(exp.Detach, this=this, exists=exists) 629 ) 630 631 def _parse_show_duckdb(self, this: str) -> exp.Show: 632 return self.expression(exp.Show, this=this) 633 634 def _parse_primary(self) -> t.Optional[exp.Expression]: 635 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 636 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 637 638 return super()._parse_primary() 639 640 class Generator(generator.Generator): 641 PARAMETER_TOKEN = "$" 642 NAMED_PLACEHOLDER_TOKEN = "$" 643 JOIN_HINTS = False 644 TABLE_HINTS = False 645 QUERY_HINTS = False 646 LIMIT_FETCH = "LIMIT" 647 STRUCT_DELIMITER = ("(", ")") 648 RENAME_TABLE_WITH_DB = False 649 NVL2_SUPPORTED = False 650 SEMI_ANTI_JOIN_WITH_SIDE = False 651 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 652 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 653 LAST_DAY_SUPPORTS_DATE_PART = False 654 JSON_KEY_VALUE_PAIR_SEP = "," 655 IGNORE_NULLS_IN_FUNC = True 656 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 657 SUPPORTS_CREATE_TABLE_LIKE = False 658 MULTI_ARG_DISTINCT = False 659 CAN_IMPLEMENT_ARRAY_ANY = True 660 SUPPORTS_TO_NUMBER = False 661 SUPPORTS_WINDOW_EXCLUDE = True 662 COPY_HAS_INTO_KEYWORD = False 663 STAR_EXCEPT = "EXCLUDE" 664 PAD_FILL_PATTERN_IS_REQUIRED = True 665 ARRAY_CONCAT_IS_VAR_LEN = False 666 ARRAY_SIZE_DIM_REQUIRED = False 667 NORMALIZE_EXTRACT_DATE_PARTS = True 668 SUPPORTS_LIKE_QUANTIFIERS = False 669 670 TRANSFORMS = { 671 **generator.Generator.TRANSFORMS, 672 exp.ApproxDistinct: approx_count_distinct_sql, 673 exp.Array: inline_array_unless_query, 674 exp.ArrayFilter: rename_func("LIST_FILTER"), 675 exp.ArrayRemove: remove_from_array_using_filter, 676 exp.ArraySort: _array_sort_sql, 677 exp.ArraySum: rename_func("LIST_SUM"), 678 exp.BitwiseXor: rename_func("XOR"), 679 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 680 exp.CurrentDate: lambda *_: "CURRENT_DATE", 681 exp.CurrentTime: lambda *_: "CURRENT_TIME", 682 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 683 exp.DayOfMonth: rename_func("DAYOFMONTH"), 684 exp.DayOfWeek: rename_func("DAYOFWEEK"), 685 exp.DayOfWeekIso: rename_func("ISODOW"), 686 exp.DayOfYear: rename_func("DAYOFYEAR"), 687 exp.DataType: _datatype_sql, 688 exp.Date: _date_sql, 689 exp.DateAdd: _date_delta_sql, 690 exp.DateFromParts: rename_func("MAKE_DATE"), 691 exp.DateSub: _date_delta_sql, 692 exp.DateDiff: _date_diff_sql, 693 exp.DateStrToDate: datestrtodate_sql, 694 exp.Datetime: no_datetime_sql, 695 exp.DatetimeSub: _date_delta_sql, 696 exp.DatetimeAdd: _date_delta_sql, 697 exp.DateToDi: lambda self, 698 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 699 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 700 exp.DiToDate: lambda self, 701 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 702 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 703 exp.GenerateDateArray: _generate_datetime_array_sql, 704 exp.GenerateTimestampArray: _generate_datetime_array_sql, 705 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 706 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 707 exp.Explode: rename_func("UNNEST"), 708 exp.IntDiv: lambda self, e: self.binary(e, "//"), 709 exp.IsInf: rename_func("ISINF"), 710 exp.IsNan: rename_func("ISNAN"), 711 exp.JSONBExists: rename_func("JSON_EXISTS"), 712 exp.JSONExtract: _arrow_json_extract_sql, 713 exp.JSONExtractArray: _json_extract_value_array_sql, 714 exp.JSONExtractScalar: _arrow_json_extract_sql, 715 exp.JSONFormat: _json_format_sql, 716 exp.JSONValueArray: _json_extract_value_array_sql, 717 exp.Lateral: explode_to_unnest_sql, 718 exp.LogicalOr: rename_func("BOOL_OR"), 719 exp.LogicalAnd: rename_func("BOOL_AND"), 720 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 721 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 722 exp.MonthsBetween: lambda self, e: self.func( 723 "DATEDIFF", 724 "'month'", 725 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 726 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 727 ), 728 exp.PercentileCont: rename_func("QUANTILE_CONT"), 729 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 730 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 731 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 732 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 733 exp.RegexpReplace: lambda self, e: self.func( 734 "REGEXP_REPLACE", 735 e.this, 736 e.expression, 737 e.args.get("replacement"), 738 e.args.get("modifiers"), 739 ), 740 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 741 exp.RegexpILike: lambda self, e: self.func( 742 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 743 ), 744 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 745 exp.Return: lambda self, e: self.sql(e, "this"), 746 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 747 exp.Rand: rename_func("RANDOM"), 748 exp.SHA: rename_func("SHA1"), 749 exp.SHA2: sha256_sql, 750 exp.Split: rename_func("STR_SPLIT"), 751 exp.SortArray: _sort_array_sql, 752 exp.StrPosition: strposition_sql, 753 exp.StrToUnix: lambda self, e: self.func( 754 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 755 ), 756 exp.Struct: _struct_sql, 757 exp.Transform: rename_func("LIST_TRANSFORM"), 758 exp.TimeAdd: _date_delta_sql, 759 exp.Time: no_time_sql, 760 exp.TimeDiff: _timediff_sql, 761 exp.Timestamp: no_timestamp_sql, 762 exp.TimestampDiff: lambda self, e: self.func( 763 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 764 ), 765 exp.TimestampTrunc: timestamptrunc_sql(), 766 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 767 exp.TimeStrToTime: timestrtotime_sql, 768 exp.TimeStrToUnix: lambda self, e: self.func( 769 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 770 ), 771 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 772 exp.TimeToUnix: rename_func("EPOCH"), 773 exp.TsOrDiToDi: lambda self, 774 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 775 exp.TsOrDsAdd: _date_delta_sql, 776 exp.TsOrDsDiff: lambda self, e: self.func( 777 "DATE_DIFF", 778 f"'{e.args.get('unit') or 'DAY'}'", 779 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 780 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 781 ), 782 exp.UnixToStr: lambda self, e: self.func( 783 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 784 ), 785 exp.DatetimeTrunc: lambda self, e: self.func( 786 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 787 ), 788 exp.UnixToTime: _unix_to_time_sql, 789 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 790 exp.VariancePop: rename_func("VAR_POP"), 791 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 792 exp.Xor: bool_xor_sql, 793 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 794 rename_func("LEVENSHTEIN") 795 ), 796 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 797 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 798 exp.DateBin: rename_func("TIME_BUCKET"), 799 } 800 801 SUPPORTED_JSON_PATH_PARTS = { 802 exp.JSONPathKey, 803 exp.JSONPathRoot, 804 exp.JSONPathSubscript, 805 exp.JSONPathWildcard, 806 } 807 808 TYPE_MAPPING = { 809 **generator.Generator.TYPE_MAPPING, 810 exp.DataType.Type.BINARY: "BLOB", 811 exp.DataType.Type.BPCHAR: "TEXT", 812 exp.DataType.Type.CHAR: "TEXT", 813 exp.DataType.Type.DATETIME: "TIMESTAMP", 814 exp.DataType.Type.FLOAT: "REAL", 815 exp.DataType.Type.JSONB: "JSON", 816 exp.DataType.Type.NCHAR: "TEXT", 817 exp.DataType.Type.NVARCHAR: "TEXT", 818 exp.DataType.Type.UINT: "UINTEGER", 819 exp.DataType.Type.VARBINARY: "BLOB", 820 exp.DataType.Type.ROWVERSION: "BLOB", 821 exp.DataType.Type.VARCHAR: "TEXT", 822 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 823 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 824 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 825 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 826 } 827 828 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 829 RESERVED_KEYWORDS = { 830 "array", 831 "analyse", 832 "union", 833 "all", 834 "when", 835 "in_p", 836 "default", 837 "create_p", 838 "window", 839 "asymmetric", 840 "to", 841 "else", 842 "localtime", 843 "from", 844 "end_p", 845 "select", 846 "current_date", 847 "foreign", 848 "with", 849 "grant", 850 "session_user", 851 "or", 852 "except", 853 "references", 854 "fetch", 855 "limit", 856 "group_p", 857 "leading", 858 "into", 859 "collate", 860 "offset", 861 "do", 862 "then", 863 "localtimestamp", 864 "check_p", 865 "lateral_p", 866 "current_role", 867 "where", 868 "asc_p", 869 "placing", 870 "desc_p", 871 "user", 872 "unique", 873 "initially", 874 "column", 875 "both", 876 "some", 877 "as", 878 "any", 879 "only", 880 "deferrable", 881 "null_p", 882 "current_time", 883 "true_p", 884 "table", 885 "case", 886 "trailing", 887 "variadic", 888 "for", 889 "on", 890 "distinct", 891 "false_p", 892 "not", 893 "constraint", 894 "current_timestamp", 895 "returning", 896 "primary", 897 "intersect", 898 "having", 899 "analyze", 900 "current_user", 901 "and", 902 "cast", 903 "symmetric", 904 "using", 905 "order", 906 "current_catalog", 907 } 908 909 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 910 911 # DuckDB doesn't generally support CREATE TABLE .. properties 912 # https://duckdb.org/docs/sql/statements/create_table.html 913 PROPERTIES_LOCATION = { 914 prop: exp.Properties.Location.UNSUPPORTED 915 for prop in generator.Generator.PROPERTIES_LOCATION 916 } 917 918 # There are a few exceptions (e.g. temporary tables) which are supported or 919 # can be transpiled to DuckDB, so we explicitly override them accordingly 920 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 921 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 922 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 923 924 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 925 exp.FirstValue, 926 exp.Lag, 927 exp.LastValue, 928 exp.Lead, 929 exp.NthValue, 930 ) 931 932 def lambda_sql( 933 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 934 ) -> str: 935 if expression.args.get("colon"): 936 prefix = "LAMBDA " 937 arrow_sep = ":" 938 wrap = False 939 else: 940 prefix = "" 941 942 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 943 return f"{prefix}{lambda_sql}" 944 945 def show_sql(self, expression: exp.Show) -> str: 946 return f"SHOW {expression.name}" 947 948 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 949 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 950 951 def strtotime_sql(self, expression: exp.StrToTime) -> str: 952 if expression.args.get("safe"): 953 formatted_time = self.format_time(expression) 954 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 955 return str_to_time_sql(self, expression) 956 957 def strtodate_sql(self, expression: exp.StrToDate) -> str: 958 if expression.args.get("safe"): 959 formatted_time = self.format_time(expression) 960 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 961 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 962 963 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 964 arg = expression.this 965 if expression.args.get("safe"): 966 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 967 return self.func("JSON", arg) 968 969 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 970 nano = expression.args.get("nano") 971 if nano is not None: 972 expression.set( 973 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 974 ) 975 976 return rename_func("MAKE_TIME")(self, expression) 977 978 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 979 sec = expression.args["sec"] 980 981 milli = expression.args.get("milli") 982 if milli is not None: 983 sec += milli.pop() / exp.Literal.number(1000.0) 984 985 nano = expression.args.get("nano") 986 if nano is not None: 987 sec += nano.pop() / exp.Literal.number(1000000000.0) 988 989 if milli or nano: 990 expression.set("sec", sec) 991 992 return rename_func("MAKE_TIMESTAMP")(self, expression) 993 994 def tablesample_sql( 995 self, 996 expression: exp.TableSample, 997 tablesample_keyword: t.Optional[str] = None, 998 ) -> str: 999 if not isinstance(expression.parent, exp.Select): 1000 # This sample clause only applies to a single source, not the entire resulting relation 1001 tablesample_keyword = "TABLESAMPLE" 1002 1003 if expression.args.get("size"): 1004 method = expression.args.get("method") 1005 if method and method.name.upper() != "RESERVOIR": 1006 self.unsupported( 1007 f"Sampling method {method} is not supported with a discrete sample count, " 1008 "defaulting to reservoir sampling" 1009 ) 1010 expression.set("method", exp.var("RESERVOIR")) 1011 1012 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1013 1014 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1015 if isinstance(expression.parent, exp.UserDefinedFunction): 1016 return self.sql(expression, "this") 1017 return super().columndef_sql(expression, sep) 1018 1019 def join_sql(self, expression: exp.Join) -> str: 1020 if ( 1021 expression.side == "LEFT" 1022 and not expression.args.get("on") 1023 and isinstance(expression.this, exp.Unnest) 1024 ): 1025 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1026 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1027 return super().join_sql(expression.on(exp.true())) 1028 1029 return super().join_sql(expression) 1030 1031 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1032 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1033 if expression.args.get("is_end_exclusive"): 1034 return rename_func("RANGE")(self, expression) 1035 1036 return self.function_fallback_sql(expression) 1037 1038 def countif_sql(self, expression: exp.CountIf) -> str: 1039 if self.dialect.version >= Version("1.2"): 1040 return self.function_fallback_sql(expression) 1041 1042 # https://github.com/tobymao/sqlglot/pull/4749 1043 return count_if_to_sum(self, expression) 1044 1045 def bracket_sql(self, expression: exp.Bracket) -> str: 1046 if self.dialect.version >= Version("1.2"): 1047 return super().bracket_sql(expression) 1048 1049 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1050 this = expression.this 1051 if isinstance(this, exp.Array): 1052 this.replace(exp.paren(this)) 1053 1054 bracket = super().bracket_sql(expression) 1055 1056 if not expression.args.get("returns_list_for_maps"): 1057 if not this.type: 1058 from sqlglot.optimizer.annotate_types import annotate_types 1059 1060 this = annotate_types(this, dialect=self.dialect) 1061 1062 if this.is_type(exp.DataType.Type.MAP): 1063 bracket = f"({bracket})[1]" 1064 1065 return bracket 1066 1067 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1068 expression_sql = self.sql(expression, "expression") 1069 1070 func = expression.this 1071 if isinstance(func, exp.PERCENTILES): 1072 # Make the order key the first arg and slide the fraction to the right 1073 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1074 order_col = expression.find(exp.Ordered) 1075 if order_col: 1076 func.set("expression", func.this) 1077 func.set("this", order_col.this) 1078 1079 this = self.sql(expression, "this").rstrip(")") 1080 1081 return f"{this}{expression_sql})" 1082 1083 def length_sql(self, expression: exp.Length) -> str: 1084 arg = expression.this 1085 1086 # Dialects like BQ and Snowflake also accept binary values as args, so 1087 # DDB will attempt to infer the type or resort to case/when resolution 1088 if not expression.args.get("binary") or arg.is_string: 1089 return self.func("LENGTH", arg) 1090 1091 if not arg.type: 1092 from sqlglot.optimizer.annotate_types import annotate_types 1093 1094 arg = annotate_types(arg, dialect=self.dialect) 1095 1096 if arg.is_type(*exp.DataType.TEXT_TYPES): 1097 return self.func("LENGTH", arg) 1098 1099 # We need these casts to make duckdb's static type checker happy 1100 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1101 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1102 1103 case = ( 1104 exp.case(self.func("TYPEOF", arg)) 1105 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1106 .else_( 1107 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1108 ) # anonymous to break length_sql recursion 1109 ) 1110 1111 return self.sql(case) 1112 1113 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1114 this = expression.this 1115 key = expression.args.get("key") 1116 key_sql = key.name if isinstance(key, exp.Expression) else "" 1117 value_sql = self.sql(expression, "value") 1118 1119 kv_sql = f"{key_sql} := {value_sql}" 1120 1121 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1122 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1123 if isinstance(this, exp.Struct) and not this.expressions: 1124 return self.func("STRUCT_PACK", kv_sql) 1125 1126 return self.func("STRUCT_INSERT", this, kv_sql) 1127 1128 def unnest_sql(self, expression: exp.Unnest) -> str: 1129 explode_array = expression.args.get("explode_array") 1130 if explode_array: 1131 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1132 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1133 expression.expressions.append( 1134 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1135 ) 1136 1137 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1138 alias = expression.args.get("alias") 1139 if alias: 1140 expression.set("alias", None) 1141 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1142 1143 unnest_sql = super().unnest_sql(expression) 1144 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1145 return self.sql(select) 1146 1147 return super().unnest_sql(expression) 1148 1149 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1150 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1151 # DuckDB should render IGNORE NULLS only for the general-purpose 1152 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1153 return super().ignorenulls_sql(expression) 1154 1155 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1156 return self.sql(expression, "this") 1157 1158 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1159 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1160 # DuckDB should render RESPECT NULLS only for the general-purpose 1161 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1162 return super().respectnulls_sql(expression) 1163 1164 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1165 return self.sql(expression, "this") 1166 1167 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1168 this = self.sql(expression, "this") 1169 null_text = self.sql(expression, "null") 1170 1171 if null_text: 1172 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1173 1174 return self.func("ARRAY_TO_STRING", this, expression.expression) 1175 1176 @unsupported_args("position", "occurrence") 1177 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1178 group = expression.args.get("group") 1179 params = expression.args.get("parameters") 1180 1181 # Do not render group if there is no following argument, 1182 # and it's the default value for this dialect 1183 if ( 1184 not params 1185 and group 1186 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1187 ): 1188 group = None 1189 return self.func( 1190 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1191 ) 1192 1193 @unsupported_args("culture") 1194 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1195 fmt = expression.args.get("format") 1196 if fmt and fmt.is_int: 1197 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1198 1199 self.unsupported("Only integer formats are supported by NumberToStr") 1200 return self.function_fallback_sql(expression) 1201 1202 def autoincrementcolumnconstraint_sql(self, _) -> str: 1203 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1204 return "" 1205 1206 def aliases_sql(self, expression: exp.Aliases) -> str: 1207 this = expression.this 1208 if isinstance(this, exp.Posexplode): 1209 return self.posexplode_sql(this) 1210 1211 return super().aliases_sql(expression) 1212 1213 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1214 this = expression.this 1215 parent = expression.parent 1216 1217 # The default Spark aliases are "pos" and "col", unless specified otherwise 1218 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1219 1220 if isinstance(parent, exp.Aliases): 1221 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1222 pos, col = parent.expressions 1223 elif isinstance(parent, exp.Table): 1224 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1225 alias = parent.args.get("alias") 1226 if alias: 1227 pos, col = alias.columns or [pos, col] 1228 alias.pop() 1229 1230 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1231 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1232 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1233 gen_subscripts = self.sql( 1234 exp.Alias( 1235 this=exp.Anonymous( 1236 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1237 ) 1238 - exp.Literal.number(1), 1239 alias=pos, 1240 ) 1241 ) 1242 1243 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1244 1245 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1246 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1247 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1248 1249 return posexplode_sql
288class DuckDB(Dialect): 289 NULL_ORDERING = "nulls_are_last" 290 SUPPORTS_USER_DEFINED_TYPES = True 291 SAFE_DIVISION = True 292 INDEX_OFFSET = 1 293 CONCAT_COALESCE = True 294 SUPPORTS_ORDER_BY_ALL = True 295 SUPPORTS_FIXED_SIZE_ARRAYS = True 296 STRICT_JSON_PATH_SYNTAX = False 297 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 298 299 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 300 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 301 302 DATE_PART_MAPPING = { 303 **Dialect.DATE_PART_MAPPING, 304 "DAYOFWEEKISO": "ISODOW", 305 } 306 DATE_PART_MAPPING.pop("WEEKDAY") 307 308 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 309 if isinstance(path, exp.Literal): 310 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 311 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 312 # This check ensures we'll avoid trying to parse these as JSON paths, which can 313 # either result in a noisy warning or in an invalid representation of the path. 314 path_text = path.name 315 if path_text.startswith("/") or "[#" in path_text: 316 return path 317 318 return super().to_json_path(path) 319 320 class Tokenizer(tokens.Tokenizer): 321 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 322 HEREDOC_STRINGS = ["$"] 323 324 HEREDOC_TAG_IS_IDENTIFIER = True 325 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 326 327 KEYWORDS = { 328 **tokens.Tokenizer.KEYWORDS, 329 "//": TokenType.DIV, 330 "**": TokenType.DSTAR, 331 "^@": TokenType.CARET_AT, 332 "@>": TokenType.AT_GT, 333 "<@": TokenType.LT_AT, 334 "ATTACH": TokenType.ATTACH, 335 "BINARY": TokenType.VARBINARY, 336 "BITSTRING": TokenType.BIT, 337 "BPCHAR": TokenType.TEXT, 338 "CHAR": TokenType.TEXT, 339 "DATETIME": TokenType.TIMESTAMPNTZ, 340 "DETACH": TokenType.DETACH, 341 "EXCLUDE": TokenType.EXCEPT, 342 "LOGICAL": TokenType.BOOLEAN, 343 "ONLY": TokenType.ONLY, 344 "PIVOT_WIDER": TokenType.PIVOT, 345 "POSITIONAL": TokenType.POSITIONAL, 346 "RESET": TokenType.COMMAND, 347 "SIGNED": TokenType.INT, 348 "STRING": TokenType.TEXT, 349 "SUMMARIZE": TokenType.SUMMARIZE, 350 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 351 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 352 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 353 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 354 "TIMESTAMP_US": TokenType.TIMESTAMP, 355 "UBIGINT": TokenType.UBIGINT, 356 "UINTEGER": TokenType.UINT, 357 "USMALLINT": TokenType.USMALLINT, 358 "UTINYINT": TokenType.UTINYINT, 359 "VARCHAR": TokenType.TEXT, 360 } 361 KEYWORDS.pop("/*+") 362 363 SINGLE_TOKENS = { 364 **tokens.Tokenizer.SINGLE_TOKENS, 365 "$": TokenType.PARAMETER, 366 } 367 368 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 369 370 class Parser(parser.Parser): 371 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 372 373 BITWISE = { 374 **parser.Parser.BITWISE, 375 TokenType.TILDA: exp.RegexpLike, 376 } 377 BITWISE.pop(TokenType.CARET) 378 379 RANGE_PARSERS = { 380 **parser.Parser.RANGE_PARSERS, 381 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 382 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 383 } 384 385 EXPONENT = { 386 **parser.Parser.EXPONENT, 387 TokenType.CARET: exp.Pow, 388 TokenType.DSTAR: exp.Pow, 389 } 390 391 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 392 393 SHOW_PARSERS = { 394 "TABLES": _show_parser("TABLES"), 395 "ALL TABLES": _show_parser("ALL TABLES"), 396 } 397 398 FUNCTIONS = { 399 **parser.Parser.FUNCTIONS, 400 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 401 "ARRAY_SORT": exp.SortArray.from_arg_list, 402 "DATEDIFF": _build_date_diff, 403 "DATE_DIFF": _build_date_diff, 404 "DATE_TRUNC": date_trunc_to_time, 405 "DATETRUNC": date_trunc_to_time, 406 "DECODE": lambda args: exp.Decode( 407 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 408 ), 409 "EDITDIST3": exp.Levenshtein.from_arg_list, 410 "ENCODE": lambda args: exp.Encode( 411 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 412 ), 413 "EPOCH": exp.TimeToUnix.from_arg_list, 414 "EPOCH_MS": lambda args: exp.UnixToTime( 415 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 416 ), 417 "GENERATE_SERIES": _build_generate_series(), 418 "JSON": exp.ParseJSON.from_arg_list, 419 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 420 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 421 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 422 "LIST_HAS": exp.ArrayContains.from_arg_list, 423 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 424 "LIST_REVERSE_SORT": _build_sort_array_desc, 425 "LIST_SORT": exp.SortArray.from_arg_list, 426 "LIST_VALUE": lambda args: exp.Array(expressions=args), 427 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 428 "MAKE_TIMESTAMP": _build_make_timestamp, 429 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 430 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 431 "RANGE": _build_generate_series(end_exclusive=True), 432 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 433 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 434 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 435 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 436 this=seq_get(args, 0), 437 expression=seq_get(args, 1), 438 replacement=seq_get(args, 2), 439 modifiers=seq_get(args, 3), 440 ), 441 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 442 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 443 "STRING_SPLIT": exp.Split.from_arg_list, 444 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 445 "STRING_TO_ARRAY": exp.Split.from_arg_list, 446 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 447 "STRUCT_PACK": exp.Struct.from_arg_list, 448 "STR_SPLIT": exp.Split.from_arg_list, 449 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 450 "TIME_BUCKET": exp.DateBin.from_arg_list, 451 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 452 "UNNEST": exp.Explode.from_arg_list, 453 "XOR": binary_from_function(exp.BitwiseXor), 454 } 455 456 FUNCTIONS.pop("DATE_SUB") 457 FUNCTIONS.pop("GLOB") 458 459 FUNCTION_PARSERS = { 460 **parser.Parser.FUNCTION_PARSERS, 461 **dict.fromkeys( 462 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 463 ), 464 } 465 FUNCTION_PARSERS.pop("DECODE") 466 467 NO_PAREN_FUNCTION_PARSERS = { 468 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 469 "MAP": lambda self: self._parse_map(), 470 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 471 } 472 473 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 474 TokenType.SEMI, 475 TokenType.ANTI, 476 } 477 478 PLACEHOLDER_PARSERS = { 479 **parser.Parser.PLACEHOLDER_PARSERS, 480 TokenType.PARAMETER: lambda self: ( 481 self.expression(exp.Placeholder, this=self._prev.text) 482 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 483 else None 484 ), 485 } 486 487 TYPE_CONVERTERS = { 488 # https://duckdb.org/docs/sql/data_types/numeric 489 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 490 # https://duckdb.org/docs/sql/data_types/text 491 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 492 } 493 494 STATEMENT_PARSERS = { 495 **parser.Parser.STATEMENT_PARSERS, 496 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 497 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 498 TokenType.SHOW: lambda self: self._parse_show(), 499 } 500 501 SET_PARSERS = { 502 **parser.Parser.SET_PARSERS, 503 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 504 } 505 506 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 507 index = self._index 508 if not self._match_text_seq("LAMBDA"): 509 return super()._parse_lambda(alias=alias) 510 511 expressions = self._parse_csv(self._parse_lambda_arg) 512 if not self._match(TokenType.COLON): 513 self._retreat(index) 514 return None 515 516 this = self._replace_lambda(self._parse_assignment(), expressions) 517 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 518 519 def _parse_expression(self) -> t.Optional[exp.Expression]: 520 # DuckDB supports prefix aliases, e.g. foo: 1 521 if self._next and self._next.token_type == TokenType.COLON: 522 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 523 self._match(TokenType.COLON) 524 comments = self._prev_comments or [] 525 526 this = self._parse_assignment() 527 if isinstance(this, exp.Expression): 528 # Moves the comment next to the alias in `alias: expr /* comment */` 529 comments += this.pop_comments() or [] 530 531 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 532 533 return super()._parse_expression() 534 535 def _parse_table( 536 self, 537 schema: bool = False, 538 joins: bool = False, 539 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 540 parse_bracket: bool = False, 541 is_db_reference: bool = False, 542 parse_partition: bool = False, 543 consume_pipe: bool = False, 544 ) -> t.Optional[exp.Expression]: 545 # DuckDB supports prefix aliases, e.g. FROM foo: bar 546 if self._next and self._next.token_type == TokenType.COLON: 547 alias = self._parse_table_alias( 548 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 549 ) 550 self._match(TokenType.COLON) 551 comments = self._prev_comments or [] 552 else: 553 alias = None 554 comments = [] 555 556 table = super()._parse_table( 557 schema=schema, 558 joins=joins, 559 alias_tokens=alias_tokens, 560 parse_bracket=parse_bracket, 561 is_db_reference=is_db_reference, 562 parse_partition=parse_partition, 563 ) 564 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 565 # Moves the comment next to the alias in `alias: table /* comment */` 566 comments += table.pop_comments() or [] 567 alias.comments = alias.pop_comments() + comments 568 table.set("alias", alias) 569 570 return table 571 572 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 573 # https://duckdb.org/docs/sql/samples.html 574 sample = super()._parse_table_sample(as_modifier=as_modifier) 575 if sample and not sample.args.get("method"): 576 if sample.args.get("size"): 577 sample.set("method", exp.var("RESERVOIR")) 578 else: 579 sample.set("method", exp.var("SYSTEM")) 580 581 return sample 582 583 def _parse_bracket( 584 self, this: t.Optional[exp.Expression] = None 585 ) -> t.Optional[exp.Expression]: 586 bracket = super()._parse_bracket(this) 587 588 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 589 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 590 bracket.set("returns_list_for_maps", True) 591 592 return bracket 593 594 def _parse_map(self) -> exp.ToMap | exp.Map: 595 if self._match(TokenType.L_BRACE, advance=False): 596 return self.expression(exp.ToMap, this=self._parse_bracket()) 597 598 args = self._parse_wrapped_csv(self._parse_assignment) 599 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 600 601 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 602 return self._parse_field_def() 603 604 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 605 if len(aggregations) == 1: 606 return super()._pivot_column_names(aggregations) 607 return pivot_column_names(aggregations, dialect="duckdb") 608 609 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 610 def _parse_attach_option() -> exp.AttachOption: 611 return self.expression( 612 exp.AttachOption, 613 this=self._parse_var(any_token=True), 614 expression=self._parse_field(any_token=True), 615 ) 616 617 self._match(TokenType.DATABASE) 618 exists = self._parse_exists(not_=is_attach) 619 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 620 621 if self._match(TokenType.L_PAREN, advance=False): 622 expressions = self._parse_wrapped_csv(_parse_attach_option) 623 else: 624 expressions = None 625 626 return ( 627 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 628 if is_attach 629 else self.expression(exp.Detach, this=this, exists=exists) 630 ) 631 632 def _parse_show_duckdb(self, this: str) -> exp.Show: 633 return self.expression(exp.Show, this=this) 634 635 def _parse_primary(self) -> t.Optional[exp.Expression]: 636 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 637 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 638 639 return super()._parse_primary() 640 641 class Generator(generator.Generator): 642 PARAMETER_TOKEN = "$" 643 NAMED_PLACEHOLDER_TOKEN = "$" 644 JOIN_HINTS = False 645 TABLE_HINTS = False 646 QUERY_HINTS = False 647 LIMIT_FETCH = "LIMIT" 648 STRUCT_DELIMITER = ("(", ")") 649 RENAME_TABLE_WITH_DB = False 650 NVL2_SUPPORTED = False 651 SEMI_ANTI_JOIN_WITH_SIDE = False 652 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 653 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 654 LAST_DAY_SUPPORTS_DATE_PART = False 655 JSON_KEY_VALUE_PAIR_SEP = "," 656 IGNORE_NULLS_IN_FUNC = True 657 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 658 SUPPORTS_CREATE_TABLE_LIKE = False 659 MULTI_ARG_DISTINCT = False 660 CAN_IMPLEMENT_ARRAY_ANY = True 661 SUPPORTS_TO_NUMBER = False 662 SUPPORTS_WINDOW_EXCLUDE = True 663 COPY_HAS_INTO_KEYWORD = False 664 STAR_EXCEPT = "EXCLUDE" 665 PAD_FILL_PATTERN_IS_REQUIRED = True 666 ARRAY_CONCAT_IS_VAR_LEN = False 667 ARRAY_SIZE_DIM_REQUIRED = False 668 NORMALIZE_EXTRACT_DATE_PARTS = True 669 SUPPORTS_LIKE_QUANTIFIERS = False 670 671 TRANSFORMS = { 672 **generator.Generator.TRANSFORMS, 673 exp.ApproxDistinct: approx_count_distinct_sql, 674 exp.Array: inline_array_unless_query, 675 exp.ArrayFilter: rename_func("LIST_FILTER"), 676 exp.ArrayRemove: remove_from_array_using_filter, 677 exp.ArraySort: _array_sort_sql, 678 exp.ArraySum: rename_func("LIST_SUM"), 679 exp.BitwiseXor: rename_func("XOR"), 680 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 681 exp.CurrentDate: lambda *_: "CURRENT_DATE", 682 exp.CurrentTime: lambda *_: "CURRENT_TIME", 683 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 684 exp.DayOfMonth: rename_func("DAYOFMONTH"), 685 exp.DayOfWeek: rename_func("DAYOFWEEK"), 686 exp.DayOfWeekIso: rename_func("ISODOW"), 687 exp.DayOfYear: rename_func("DAYOFYEAR"), 688 exp.DataType: _datatype_sql, 689 exp.Date: _date_sql, 690 exp.DateAdd: _date_delta_sql, 691 exp.DateFromParts: rename_func("MAKE_DATE"), 692 exp.DateSub: _date_delta_sql, 693 exp.DateDiff: _date_diff_sql, 694 exp.DateStrToDate: datestrtodate_sql, 695 exp.Datetime: no_datetime_sql, 696 exp.DatetimeSub: _date_delta_sql, 697 exp.DatetimeAdd: _date_delta_sql, 698 exp.DateToDi: lambda self, 699 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 700 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 701 exp.DiToDate: lambda self, 702 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 703 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 704 exp.GenerateDateArray: _generate_datetime_array_sql, 705 exp.GenerateTimestampArray: _generate_datetime_array_sql, 706 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 707 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 708 exp.Explode: rename_func("UNNEST"), 709 exp.IntDiv: lambda self, e: self.binary(e, "//"), 710 exp.IsInf: rename_func("ISINF"), 711 exp.IsNan: rename_func("ISNAN"), 712 exp.JSONBExists: rename_func("JSON_EXISTS"), 713 exp.JSONExtract: _arrow_json_extract_sql, 714 exp.JSONExtractArray: _json_extract_value_array_sql, 715 exp.JSONExtractScalar: _arrow_json_extract_sql, 716 exp.JSONFormat: _json_format_sql, 717 exp.JSONValueArray: _json_extract_value_array_sql, 718 exp.Lateral: explode_to_unnest_sql, 719 exp.LogicalOr: rename_func("BOOL_OR"), 720 exp.LogicalAnd: rename_func("BOOL_AND"), 721 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 722 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 723 exp.MonthsBetween: lambda self, e: self.func( 724 "DATEDIFF", 725 "'month'", 726 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 727 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 728 ), 729 exp.PercentileCont: rename_func("QUANTILE_CONT"), 730 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 731 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 732 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 733 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 734 exp.RegexpReplace: lambda self, e: self.func( 735 "REGEXP_REPLACE", 736 e.this, 737 e.expression, 738 e.args.get("replacement"), 739 e.args.get("modifiers"), 740 ), 741 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 742 exp.RegexpILike: lambda self, e: self.func( 743 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 744 ), 745 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 746 exp.Return: lambda self, e: self.sql(e, "this"), 747 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 748 exp.Rand: rename_func("RANDOM"), 749 exp.SHA: rename_func("SHA1"), 750 exp.SHA2: sha256_sql, 751 exp.Split: rename_func("STR_SPLIT"), 752 exp.SortArray: _sort_array_sql, 753 exp.StrPosition: strposition_sql, 754 exp.StrToUnix: lambda self, e: self.func( 755 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 756 ), 757 exp.Struct: _struct_sql, 758 exp.Transform: rename_func("LIST_TRANSFORM"), 759 exp.TimeAdd: _date_delta_sql, 760 exp.Time: no_time_sql, 761 exp.TimeDiff: _timediff_sql, 762 exp.Timestamp: no_timestamp_sql, 763 exp.TimestampDiff: lambda self, e: self.func( 764 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 765 ), 766 exp.TimestampTrunc: timestamptrunc_sql(), 767 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 768 exp.TimeStrToTime: timestrtotime_sql, 769 exp.TimeStrToUnix: lambda self, e: self.func( 770 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 771 ), 772 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 773 exp.TimeToUnix: rename_func("EPOCH"), 774 exp.TsOrDiToDi: lambda self, 775 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 776 exp.TsOrDsAdd: _date_delta_sql, 777 exp.TsOrDsDiff: lambda self, e: self.func( 778 "DATE_DIFF", 779 f"'{e.args.get('unit') or 'DAY'}'", 780 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 781 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 782 ), 783 exp.UnixToStr: lambda self, e: self.func( 784 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 785 ), 786 exp.DatetimeTrunc: lambda self, e: self.func( 787 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 788 ), 789 exp.UnixToTime: _unix_to_time_sql, 790 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 791 exp.VariancePop: rename_func("VAR_POP"), 792 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 793 exp.Xor: bool_xor_sql, 794 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 795 rename_func("LEVENSHTEIN") 796 ), 797 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 798 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 799 exp.DateBin: rename_func("TIME_BUCKET"), 800 } 801 802 SUPPORTED_JSON_PATH_PARTS = { 803 exp.JSONPathKey, 804 exp.JSONPathRoot, 805 exp.JSONPathSubscript, 806 exp.JSONPathWildcard, 807 } 808 809 TYPE_MAPPING = { 810 **generator.Generator.TYPE_MAPPING, 811 exp.DataType.Type.BINARY: "BLOB", 812 exp.DataType.Type.BPCHAR: "TEXT", 813 exp.DataType.Type.CHAR: "TEXT", 814 exp.DataType.Type.DATETIME: "TIMESTAMP", 815 exp.DataType.Type.FLOAT: "REAL", 816 exp.DataType.Type.JSONB: "JSON", 817 exp.DataType.Type.NCHAR: "TEXT", 818 exp.DataType.Type.NVARCHAR: "TEXT", 819 exp.DataType.Type.UINT: "UINTEGER", 820 exp.DataType.Type.VARBINARY: "BLOB", 821 exp.DataType.Type.ROWVERSION: "BLOB", 822 exp.DataType.Type.VARCHAR: "TEXT", 823 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 824 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 825 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 826 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 827 } 828 829 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 830 RESERVED_KEYWORDS = { 831 "array", 832 "analyse", 833 "union", 834 "all", 835 "when", 836 "in_p", 837 "default", 838 "create_p", 839 "window", 840 "asymmetric", 841 "to", 842 "else", 843 "localtime", 844 "from", 845 "end_p", 846 "select", 847 "current_date", 848 "foreign", 849 "with", 850 "grant", 851 "session_user", 852 "or", 853 "except", 854 "references", 855 "fetch", 856 "limit", 857 "group_p", 858 "leading", 859 "into", 860 "collate", 861 "offset", 862 "do", 863 "then", 864 "localtimestamp", 865 "check_p", 866 "lateral_p", 867 "current_role", 868 "where", 869 "asc_p", 870 "placing", 871 "desc_p", 872 "user", 873 "unique", 874 "initially", 875 "column", 876 "both", 877 "some", 878 "as", 879 "any", 880 "only", 881 "deferrable", 882 "null_p", 883 "current_time", 884 "true_p", 885 "table", 886 "case", 887 "trailing", 888 "variadic", 889 "for", 890 "on", 891 "distinct", 892 "false_p", 893 "not", 894 "constraint", 895 "current_timestamp", 896 "returning", 897 "primary", 898 "intersect", 899 "having", 900 "analyze", 901 "current_user", 902 "and", 903 "cast", 904 "symmetric", 905 "using", 906 "order", 907 "current_catalog", 908 } 909 910 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 911 912 # DuckDB doesn't generally support CREATE TABLE .. properties 913 # https://duckdb.org/docs/sql/statements/create_table.html 914 PROPERTIES_LOCATION = { 915 prop: exp.Properties.Location.UNSUPPORTED 916 for prop in generator.Generator.PROPERTIES_LOCATION 917 } 918 919 # There are a few exceptions (e.g. temporary tables) which are supported or 920 # can be transpiled to DuckDB, so we explicitly override them accordingly 921 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 922 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 923 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 924 925 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 926 exp.FirstValue, 927 exp.Lag, 928 exp.LastValue, 929 exp.Lead, 930 exp.NthValue, 931 ) 932 933 def lambda_sql( 934 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 935 ) -> str: 936 if expression.args.get("colon"): 937 prefix = "LAMBDA " 938 arrow_sep = ":" 939 wrap = False 940 else: 941 prefix = "" 942 943 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 944 return f"{prefix}{lambda_sql}" 945 946 def show_sql(self, expression: exp.Show) -> str: 947 return f"SHOW {expression.name}" 948 949 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 950 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 951 952 def strtotime_sql(self, expression: exp.StrToTime) -> str: 953 if expression.args.get("safe"): 954 formatted_time = self.format_time(expression) 955 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 956 return str_to_time_sql(self, expression) 957 958 def strtodate_sql(self, expression: exp.StrToDate) -> str: 959 if expression.args.get("safe"): 960 formatted_time = self.format_time(expression) 961 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 962 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 963 964 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 965 arg = expression.this 966 if expression.args.get("safe"): 967 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 968 return self.func("JSON", arg) 969 970 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 971 nano = expression.args.get("nano") 972 if nano is not None: 973 expression.set( 974 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 975 ) 976 977 return rename_func("MAKE_TIME")(self, expression) 978 979 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 980 sec = expression.args["sec"] 981 982 milli = expression.args.get("milli") 983 if milli is not None: 984 sec += milli.pop() / exp.Literal.number(1000.0) 985 986 nano = expression.args.get("nano") 987 if nano is not None: 988 sec += nano.pop() / exp.Literal.number(1000000000.0) 989 990 if milli or nano: 991 expression.set("sec", sec) 992 993 return rename_func("MAKE_TIMESTAMP")(self, expression) 994 995 def tablesample_sql( 996 self, 997 expression: exp.TableSample, 998 tablesample_keyword: t.Optional[str] = None, 999 ) -> str: 1000 if not isinstance(expression.parent, exp.Select): 1001 # This sample clause only applies to a single source, not the entire resulting relation 1002 tablesample_keyword = "TABLESAMPLE" 1003 1004 if expression.args.get("size"): 1005 method = expression.args.get("method") 1006 if method and method.name.upper() != "RESERVOIR": 1007 self.unsupported( 1008 f"Sampling method {method} is not supported with a discrete sample count, " 1009 "defaulting to reservoir sampling" 1010 ) 1011 expression.set("method", exp.var("RESERVOIR")) 1012 1013 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1014 1015 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1016 if isinstance(expression.parent, exp.UserDefinedFunction): 1017 return self.sql(expression, "this") 1018 return super().columndef_sql(expression, sep) 1019 1020 def join_sql(self, expression: exp.Join) -> str: 1021 if ( 1022 expression.side == "LEFT" 1023 and not expression.args.get("on") 1024 and isinstance(expression.this, exp.Unnest) 1025 ): 1026 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1027 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1028 return super().join_sql(expression.on(exp.true())) 1029 1030 return super().join_sql(expression) 1031 1032 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1033 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1034 if expression.args.get("is_end_exclusive"): 1035 return rename_func("RANGE")(self, expression) 1036 1037 return self.function_fallback_sql(expression) 1038 1039 def countif_sql(self, expression: exp.CountIf) -> str: 1040 if self.dialect.version >= Version("1.2"): 1041 return self.function_fallback_sql(expression) 1042 1043 # https://github.com/tobymao/sqlglot/pull/4749 1044 return count_if_to_sum(self, expression) 1045 1046 def bracket_sql(self, expression: exp.Bracket) -> str: 1047 if self.dialect.version >= Version("1.2"): 1048 return super().bracket_sql(expression) 1049 1050 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1051 this = expression.this 1052 if isinstance(this, exp.Array): 1053 this.replace(exp.paren(this)) 1054 1055 bracket = super().bracket_sql(expression) 1056 1057 if not expression.args.get("returns_list_for_maps"): 1058 if not this.type: 1059 from sqlglot.optimizer.annotate_types import annotate_types 1060 1061 this = annotate_types(this, dialect=self.dialect) 1062 1063 if this.is_type(exp.DataType.Type.MAP): 1064 bracket = f"({bracket})[1]" 1065 1066 return bracket 1067 1068 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1069 expression_sql = self.sql(expression, "expression") 1070 1071 func = expression.this 1072 if isinstance(func, exp.PERCENTILES): 1073 # Make the order key the first arg and slide the fraction to the right 1074 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1075 order_col = expression.find(exp.Ordered) 1076 if order_col: 1077 func.set("expression", func.this) 1078 func.set("this", order_col.this) 1079 1080 this = self.sql(expression, "this").rstrip(")") 1081 1082 return f"{this}{expression_sql})" 1083 1084 def length_sql(self, expression: exp.Length) -> str: 1085 arg = expression.this 1086 1087 # Dialects like BQ and Snowflake also accept binary values as args, so 1088 # DDB will attempt to infer the type or resort to case/when resolution 1089 if not expression.args.get("binary") or arg.is_string: 1090 return self.func("LENGTH", arg) 1091 1092 if not arg.type: 1093 from sqlglot.optimizer.annotate_types import annotate_types 1094 1095 arg = annotate_types(arg, dialect=self.dialect) 1096 1097 if arg.is_type(*exp.DataType.TEXT_TYPES): 1098 return self.func("LENGTH", arg) 1099 1100 # We need these casts to make duckdb's static type checker happy 1101 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1102 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1103 1104 case = ( 1105 exp.case(self.func("TYPEOF", arg)) 1106 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1107 .else_( 1108 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1109 ) # anonymous to break length_sql recursion 1110 ) 1111 1112 return self.sql(case) 1113 1114 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1115 this = expression.this 1116 key = expression.args.get("key") 1117 key_sql = key.name if isinstance(key, exp.Expression) else "" 1118 value_sql = self.sql(expression, "value") 1119 1120 kv_sql = f"{key_sql} := {value_sql}" 1121 1122 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1123 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1124 if isinstance(this, exp.Struct) and not this.expressions: 1125 return self.func("STRUCT_PACK", kv_sql) 1126 1127 return self.func("STRUCT_INSERT", this, kv_sql) 1128 1129 def unnest_sql(self, expression: exp.Unnest) -> str: 1130 explode_array = expression.args.get("explode_array") 1131 if explode_array: 1132 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1133 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1134 expression.expressions.append( 1135 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1136 ) 1137 1138 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1139 alias = expression.args.get("alias") 1140 if alias: 1141 expression.set("alias", None) 1142 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1143 1144 unnest_sql = super().unnest_sql(expression) 1145 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1146 return self.sql(select) 1147 1148 return super().unnest_sql(expression) 1149 1150 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1151 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1152 # DuckDB should render IGNORE NULLS only for the general-purpose 1153 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1154 return super().ignorenulls_sql(expression) 1155 1156 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1157 return self.sql(expression, "this") 1158 1159 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1160 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1161 # DuckDB should render RESPECT NULLS only for the general-purpose 1162 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1163 return super().respectnulls_sql(expression) 1164 1165 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1166 return self.sql(expression, "this") 1167 1168 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1169 this = self.sql(expression, "this") 1170 null_text = self.sql(expression, "null") 1171 1172 if null_text: 1173 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1174 1175 return self.func("ARRAY_TO_STRING", this, expression.expression) 1176 1177 @unsupported_args("position", "occurrence") 1178 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1179 group = expression.args.get("group") 1180 params = expression.args.get("parameters") 1181 1182 # Do not render group if there is no following argument, 1183 # and it's the default value for this dialect 1184 if ( 1185 not params 1186 and group 1187 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1188 ): 1189 group = None 1190 return self.func( 1191 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1192 ) 1193 1194 @unsupported_args("culture") 1195 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1196 fmt = expression.args.get("format") 1197 if fmt and fmt.is_int: 1198 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1199 1200 self.unsupported("Only integer formats are supported by NumberToStr") 1201 return self.function_fallback_sql(expression) 1202 1203 def autoincrementcolumnconstraint_sql(self, _) -> str: 1204 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1205 return "" 1206 1207 def aliases_sql(self, expression: exp.Aliases) -> str: 1208 this = expression.this 1209 if isinstance(this, exp.Posexplode): 1210 return self.posexplode_sql(this) 1211 1212 return super().aliases_sql(expression) 1213 1214 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1215 this = expression.this 1216 parent = expression.parent 1217 1218 # The default Spark aliases are "pos" and "col", unless specified otherwise 1219 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1220 1221 if isinstance(parent, exp.Aliases): 1222 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1223 pos, col = parent.expressions 1224 elif isinstance(parent, exp.Table): 1225 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1226 alias = parent.args.get("alias") 1227 if alias: 1228 pos, col = alias.columns or [pos, col] 1229 alias.pop() 1230 1231 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1232 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1233 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1234 gen_subscripts = self.sql( 1235 exp.Alias( 1236 this=exp.Anonymous( 1237 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1238 ) 1239 - exp.Literal.number(1), 1240 alias=pos, 1241 ) 1242 ) 1243 1244 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1245 1246 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1247 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1248 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1249 1250 return posexplode_sql
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
308 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 309 if isinstance(path, exp.Literal): 310 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 311 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 312 # This check ensures we'll avoid trying to parse these as JSON paths, which can 313 # either result in a noisy warning or in an invalid representation of the path. 314 path_text = path.name 315 if path_text.startswith("/") or "[#" in path_text: 316 return path 317 318 return super().to_json_path(path)
320 class Tokenizer(tokens.Tokenizer): 321 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 322 HEREDOC_STRINGS = ["$"] 323 324 HEREDOC_TAG_IS_IDENTIFIER = True 325 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 326 327 KEYWORDS = { 328 **tokens.Tokenizer.KEYWORDS, 329 "//": TokenType.DIV, 330 "**": TokenType.DSTAR, 331 "^@": TokenType.CARET_AT, 332 "@>": TokenType.AT_GT, 333 "<@": TokenType.LT_AT, 334 "ATTACH": TokenType.ATTACH, 335 "BINARY": TokenType.VARBINARY, 336 "BITSTRING": TokenType.BIT, 337 "BPCHAR": TokenType.TEXT, 338 "CHAR": TokenType.TEXT, 339 "DATETIME": TokenType.TIMESTAMPNTZ, 340 "DETACH": TokenType.DETACH, 341 "EXCLUDE": TokenType.EXCEPT, 342 "LOGICAL": TokenType.BOOLEAN, 343 "ONLY": TokenType.ONLY, 344 "PIVOT_WIDER": TokenType.PIVOT, 345 "POSITIONAL": TokenType.POSITIONAL, 346 "RESET": TokenType.COMMAND, 347 "SIGNED": TokenType.INT, 348 "STRING": TokenType.TEXT, 349 "SUMMARIZE": TokenType.SUMMARIZE, 350 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 351 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 352 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 353 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 354 "TIMESTAMP_US": TokenType.TIMESTAMP, 355 "UBIGINT": TokenType.UBIGINT, 356 "UINTEGER": TokenType.UINT, 357 "USMALLINT": TokenType.USMALLINT, 358 "UTINYINT": TokenType.UTINYINT, 359 "VARCHAR": TokenType.TEXT, 360 } 361 KEYWORDS.pop("/*+") 362 363 SINGLE_TOKENS = { 364 **tokens.Tokenizer.SINGLE_TOKENS, 365 "$": TokenType.PARAMETER, 366 } 367 368 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
370 class Parser(parser.Parser): 371 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 372 373 BITWISE = { 374 **parser.Parser.BITWISE, 375 TokenType.TILDA: exp.RegexpLike, 376 } 377 BITWISE.pop(TokenType.CARET) 378 379 RANGE_PARSERS = { 380 **parser.Parser.RANGE_PARSERS, 381 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 382 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 383 } 384 385 EXPONENT = { 386 **parser.Parser.EXPONENT, 387 TokenType.CARET: exp.Pow, 388 TokenType.DSTAR: exp.Pow, 389 } 390 391 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 392 393 SHOW_PARSERS = { 394 "TABLES": _show_parser("TABLES"), 395 "ALL TABLES": _show_parser("ALL TABLES"), 396 } 397 398 FUNCTIONS = { 399 **parser.Parser.FUNCTIONS, 400 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 401 "ARRAY_SORT": exp.SortArray.from_arg_list, 402 "DATEDIFF": _build_date_diff, 403 "DATE_DIFF": _build_date_diff, 404 "DATE_TRUNC": date_trunc_to_time, 405 "DATETRUNC": date_trunc_to_time, 406 "DECODE": lambda args: exp.Decode( 407 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 408 ), 409 "EDITDIST3": exp.Levenshtein.from_arg_list, 410 "ENCODE": lambda args: exp.Encode( 411 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 412 ), 413 "EPOCH": exp.TimeToUnix.from_arg_list, 414 "EPOCH_MS": lambda args: exp.UnixToTime( 415 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 416 ), 417 "GENERATE_SERIES": _build_generate_series(), 418 "JSON": exp.ParseJSON.from_arg_list, 419 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 420 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 421 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 422 "LIST_HAS": exp.ArrayContains.from_arg_list, 423 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 424 "LIST_REVERSE_SORT": _build_sort_array_desc, 425 "LIST_SORT": exp.SortArray.from_arg_list, 426 "LIST_VALUE": lambda args: exp.Array(expressions=args), 427 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 428 "MAKE_TIMESTAMP": _build_make_timestamp, 429 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 430 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 431 "RANGE": _build_generate_series(end_exclusive=True), 432 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 433 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 434 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 435 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 436 this=seq_get(args, 0), 437 expression=seq_get(args, 1), 438 replacement=seq_get(args, 2), 439 modifiers=seq_get(args, 3), 440 ), 441 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 442 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 443 "STRING_SPLIT": exp.Split.from_arg_list, 444 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 445 "STRING_TO_ARRAY": exp.Split.from_arg_list, 446 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 447 "STRUCT_PACK": exp.Struct.from_arg_list, 448 "STR_SPLIT": exp.Split.from_arg_list, 449 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 450 "TIME_BUCKET": exp.DateBin.from_arg_list, 451 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 452 "UNNEST": exp.Explode.from_arg_list, 453 "XOR": binary_from_function(exp.BitwiseXor), 454 } 455 456 FUNCTIONS.pop("DATE_SUB") 457 FUNCTIONS.pop("GLOB") 458 459 FUNCTION_PARSERS = { 460 **parser.Parser.FUNCTION_PARSERS, 461 **dict.fromkeys( 462 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 463 ), 464 } 465 FUNCTION_PARSERS.pop("DECODE") 466 467 NO_PAREN_FUNCTION_PARSERS = { 468 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 469 "MAP": lambda self: self._parse_map(), 470 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 471 } 472 473 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 474 TokenType.SEMI, 475 TokenType.ANTI, 476 } 477 478 PLACEHOLDER_PARSERS = { 479 **parser.Parser.PLACEHOLDER_PARSERS, 480 TokenType.PARAMETER: lambda self: ( 481 self.expression(exp.Placeholder, this=self._prev.text) 482 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 483 else None 484 ), 485 } 486 487 TYPE_CONVERTERS = { 488 # https://duckdb.org/docs/sql/data_types/numeric 489 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 490 # https://duckdb.org/docs/sql/data_types/text 491 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 492 } 493 494 STATEMENT_PARSERS = { 495 **parser.Parser.STATEMENT_PARSERS, 496 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 497 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 498 TokenType.SHOW: lambda self: self._parse_show(), 499 } 500 501 SET_PARSERS = { 502 **parser.Parser.SET_PARSERS, 503 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 504 } 505 506 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 507 index = self._index 508 if not self._match_text_seq("LAMBDA"): 509 return super()._parse_lambda(alias=alias) 510 511 expressions = self._parse_csv(self._parse_lambda_arg) 512 if not self._match(TokenType.COLON): 513 self._retreat(index) 514 return None 515 516 this = self._replace_lambda(self._parse_assignment(), expressions) 517 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 518 519 def _parse_expression(self) -> t.Optional[exp.Expression]: 520 # DuckDB supports prefix aliases, e.g. foo: 1 521 if self._next and self._next.token_type == TokenType.COLON: 522 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 523 self._match(TokenType.COLON) 524 comments = self._prev_comments or [] 525 526 this = self._parse_assignment() 527 if isinstance(this, exp.Expression): 528 # Moves the comment next to the alias in `alias: expr /* comment */` 529 comments += this.pop_comments() or [] 530 531 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 532 533 return super()._parse_expression() 534 535 def _parse_table( 536 self, 537 schema: bool = False, 538 joins: bool = False, 539 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 540 parse_bracket: bool = False, 541 is_db_reference: bool = False, 542 parse_partition: bool = False, 543 consume_pipe: bool = False, 544 ) -> t.Optional[exp.Expression]: 545 # DuckDB supports prefix aliases, e.g. FROM foo: bar 546 if self._next and self._next.token_type == TokenType.COLON: 547 alias = self._parse_table_alias( 548 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 549 ) 550 self._match(TokenType.COLON) 551 comments = self._prev_comments or [] 552 else: 553 alias = None 554 comments = [] 555 556 table = super()._parse_table( 557 schema=schema, 558 joins=joins, 559 alias_tokens=alias_tokens, 560 parse_bracket=parse_bracket, 561 is_db_reference=is_db_reference, 562 parse_partition=parse_partition, 563 ) 564 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 565 # Moves the comment next to the alias in `alias: table /* comment */` 566 comments += table.pop_comments() or [] 567 alias.comments = alias.pop_comments() + comments 568 table.set("alias", alias) 569 570 return table 571 572 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 573 # https://duckdb.org/docs/sql/samples.html 574 sample = super()._parse_table_sample(as_modifier=as_modifier) 575 if sample and not sample.args.get("method"): 576 if sample.args.get("size"): 577 sample.set("method", exp.var("RESERVOIR")) 578 else: 579 sample.set("method", exp.var("SYSTEM")) 580 581 return sample 582 583 def _parse_bracket( 584 self, this: t.Optional[exp.Expression] = None 585 ) -> t.Optional[exp.Expression]: 586 bracket = super()._parse_bracket(this) 587 588 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 589 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 590 bracket.set("returns_list_for_maps", True) 591 592 return bracket 593 594 def _parse_map(self) -> exp.ToMap | exp.Map: 595 if self._match(TokenType.L_BRACE, advance=False): 596 return self.expression(exp.ToMap, this=self._parse_bracket()) 597 598 args = self._parse_wrapped_csv(self._parse_assignment) 599 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 600 601 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 602 return self._parse_field_def() 603 604 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 605 if len(aggregations) == 1: 606 return super()._pivot_column_names(aggregations) 607 return pivot_column_names(aggregations, dialect="duckdb") 608 609 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 610 def _parse_attach_option() -> exp.AttachOption: 611 return self.expression( 612 exp.AttachOption, 613 this=self._parse_var(any_token=True), 614 expression=self._parse_field(any_token=True), 615 ) 616 617 self._match(TokenType.DATABASE) 618 exists = self._parse_exists(not_=is_attach) 619 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 620 621 if self._match(TokenType.L_PAREN, advance=False): 622 expressions = self._parse_wrapped_csv(_parse_attach_option) 623 else: 624 expressions = None 625 626 return ( 627 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 628 if is_attach 629 else self.expression(exp.Detach, this=this, exists=exists) 630 ) 631 632 def _parse_show_duckdb(self, this: str) -> exp.Show: 633 return self.expression(exp.Show, this=this) 634 635 def _parse_primary(self) -> t.Optional[exp.Expression]: 636 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 637 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 638 639 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
641 class Generator(generator.Generator): 642 PARAMETER_TOKEN = "$" 643 NAMED_PLACEHOLDER_TOKEN = "$" 644 JOIN_HINTS = False 645 TABLE_HINTS = False 646 QUERY_HINTS = False 647 LIMIT_FETCH = "LIMIT" 648 STRUCT_DELIMITER = ("(", ")") 649 RENAME_TABLE_WITH_DB = False 650 NVL2_SUPPORTED = False 651 SEMI_ANTI_JOIN_WITH_SIDE = False 652 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 653 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 654 LAST_DAY_SUPPORTS_DATE_PART = False 655 JSON_KEY_VALUE_PAIR_SEP = "," 656 IGNORE_NULLS_IN_FUNC = True 657 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 658 SUPPORTS_CREATE_TABLE_LIKE = False 659 MULTI_ARG_DISTINCT = False 660 CAN_IMPLEMENT_ARRAY_ANY = True 661 SUPPORTS_TO_NUMBER = False 662 SUPPORTS_WINDOW_EXCLUDE = True 663 COPY_HAS_INTO_KEYWORD = False 664 STAR_EXCEPT = "EXCLUDE" 665 PAD_FILL_PATTERN_IS_REQUIRED = True 666 ARRAY_CONCAT_IS_VAR_LEN = False 667 ARRAY_SIZE_DIM_REQUIRED = False 668 NORMALIZE_EXTRACT_DATE_PARTS = True 669 SUPPORTS_LIKE_QUANTIFIERS = False 670 671 TRANSFORMS = { 672 **generator.Generator.TRANSFORMS, 673 exp.ApproxDistinct: approx_count_distinct_sql, 674 exp.Array: inline_array_unless_query, 675 exp.ArrayFilter: rename_func("LIST_FILTER"), 676 exp.ArrayRemove: remove_from_array_using_filter, 677 exp.ArraySort: _array_sort_sql, 678 exp.ArraySum: rename_func("LIST_SUM"), 679 exp.BitwiseXor: rename_func("XOR"), 680 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 681 exp.CurrentDate: lambda *_: "CURRENT_DATE", 682 exp.CurrentTime: lambda *_: "CURRENT_TIME", 683 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 684 exp.DayOfMonth: rename_func("DAYOFMONTH"), 685 exp.DayOfWeek: rename_func("DAYOFWEEK"), 686 exp.DayOfWeekIso: rename_func("ISODOW"), 687 exp.DayOfYear: rename_func("DAYOFYEAR"), 688 exp.DataType: _datatype_sql, 689 exp.Date: _date_sql, 690 exp.DateAdd: _date_delta_sql, 691 exp.DateFromParts: rename_func("MAKE_DATE"), 692 exp.DateSub: _date_delta_sql, 693 exp.DateDiff: _date_diff_sql, 694 exp.DateStrToDate: datestrtodate_sql, 695 exp.Datetime: no_datetime_sql, 696 exp.DatetimeSub: _date_delta_sql, 697 exp.DatetimeAdd: _date_delta_sql, 698 exp.DateToDi: lambda self, 699 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 700 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 701 exp.DiToDate: lambda self, 702 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 703 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 704 exp.GenerateDateArray: _generate_datetime_array_sql, 705 exp.GenerateTimestampArray: _generate_datetime_array_sql, 706 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 707 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 708 exp.Explode: rename_func("UNNEST"), 709 exp.IntDiv: lambda self, e: self.binary(e, "//"), 710 exp.IsInf: rename_func("ISINF"), 711 exp.IsNan: rename_func("ISNAN"), 712 exp.JSONBExists: rename_func("JSON_EXISTS"), 713 exp.JSONExtract: _arrow_json_extract_sql, 714 exp.JSONExtractArray: _json_extract_value_array_sql, 715 exp.JSONExtractScalar: _arrow_json_extract_sql, 716 exp.JSONFormat: _json_format_sql, 717 exp.JSONValueArray: _json_extract_value_array_sql, 718 exp.Lateral: explode_to_unnest_sql, 719 exp.LogicalOr: rename_func("BOOL_OR"), 720 exp.LogicalAnd: rename_func("BOOL_AND"), 721 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 722 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 723 exp.MonthsBetween: lambda self, e: self.func( 724 "DATEDIFF", 725 "'month'", 726 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 727 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 728 ), 729 exp.PercentileCont: rename_func("QUANTILE_CONT"), 730 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 731 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 732 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 733 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 734 exp.RegexpReplace: lambda self, e: self.func( 735 "REGEXP_REPLACE", 736 e.this, 737 e.expression, 738 e.args.get("replacement"), 739 e.args.get("modifiers"), 740 ), 741 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 742 exp.RegexpILike: lambda self, e: self.func( 743 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 744 ), 745 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 746 exp.Return: lambda self, e: self.sql(e, "this"), 747 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 748 exp.Rand: rename_func("RANDOM"), 749 exp.SHA: rename_func("SHA1"), 750 exp.SHA2: sha256_sql, 751 exp.Split: rename_func("STR_SPLIT"), 752 exp.SortArray: _sort_array_sql, 753 exp.StrPosition: strposition_sql, 754 exp.StrToUnix: lambda self, e: self.func( 755 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 756 ), 757 exp.Struct: _struct_sql, 758 exp.Transform: rename_func("LIST_TRANSFORM"), 759 exp.TimeAdd: _date_delta_sql, 760 exp.Time: no_time_sql, 761 exp.TimeDiff: _timediff_sql, 762 exp.Timestamp: no_timestamp_sql, 763 exp.TimestampDiff: lambda self, e: self.func( 764 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 765 ), 766 exp.TimestampTrunc: timestamptrunc_sql(), 767 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 768 exp.TimeStrToTime: timestrtotime_sql, 769 exp.TimeStrToUnix: lambda self, e: self.func( 770 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 771 ), 772 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 773 exp.TimeToUnix: rename_func("EPOCH"), 774 exp.TsOrDiToDi: lambda self, 775 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 776 exp.TsOrDsAdd: _date_delta_sql, 777 exp.TsOrDsDiff: lambda self, e: self.func( 778 "DATE_DIFF", 779 f"'{e.args.get('unit') or 'DAY'}'", 780 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 781 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 782 ), 783 exp.UnixToStr: lambda self, e: self.func( 784 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 785 ), 786 exp.DatetimeTrunc: lambda self, e: self.func( 787 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 788 ), 789 exp.UnixToTime: _unix_to_time_sql, 790 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 791 exp.VariancePop: rename_func("VAR_POP"), 792 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 793 exp.Xor: bool_xor_sql, 794 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 795 rename_func("LEVENSHTEIN") 796 ), 797 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 798 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 799 exp.DateBin: rename_func("TIME_BUCKET"), 800 } 801 802 SUPPORTED_JSON_PATH_PARTS = { 803 exp.JSONPathKey, 804 exp.JSONPathRoot, 805 exp.JSONPathSubscript, 806 exp.JSONPathWildcard, 807 } 808 809 TYPE_MAPPING = { 810 **generator.Generator.TYPE_MAPPING, 811 exp.DataType.Type.BINARY: "BLOB", 812 exp.DataType.Type.BPCHAR: "TEXT", 813 exp.DataType.Type.CHAR: "TEXT", 814 exp.DataType.Type.DATETIME: "TIMESTAMP", 815 exp.DataType.Type.FLOAT: "REAL", 816 exp.DataType.Type.JSONB: "JSON", 817 exp.DataType.Type.NCHAR: "TEXT", 818 exp.DataType.Type.NVARCHAR: "TEXT", 819 exp.DataType.Type.UINT: "UINTEGER", 820 exp.DataType.Type.VARBINARY: "BLOB", 821 exp.DataType.Type.ROWVERSION: "BLOB", 822 exp.DataType.Type.VARCHAR: "TEXT", 823 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 824 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 825 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 826 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 827 } 828 829 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 830 RESERVED_KEYWORDS = { 831 "array", 832 "analyse", 833 "union", 834 "all", 835 "when", 836 "in_p", 837 "default", 838 "create_p", 839 "window", 840 "asymmetric", 841 "to", 842 "else", 843 "localtime", 844 "from", 845 "end_p", 846 "select", 847 "current_date", 848 "foreign", 849 "with", 850 "grant", 851 "session_user", 852 "or", 853 "except", 854 "references", 855 "fetch", 856 "limit", 857 "group_p", 858 "leading", 859 "into", 860 "collate", 861 "offset", 862 "do", 863 "then", 864 "localtimestamp", 865 "check_p", 866 "lateral_p", 867 "current_role", 868 "where", 869 "asc_p", 870 "placing", 871 "desc_p", 872 "user", 873 "unique", 874 "initially", 875 "column", 876 "both", 877 "some", 878 "as", 879 "any", 880 "only", 881 "deferrable", 882 "null_p", 883 "current_time", 884 "true_p", 885 "table", 886 "case", 887 "trailing", 888 "variadic", 889 "for", 890 "on", 891 "distinct", 892 "false_p", 893 "not", 894 "constraint", 895 "current_timestamp", 896 "returning", 897 "primary", 898 "intersect", 899 "having", 900 "analyze", 901 "current_user", 902 "and", 903 "cast", 904 "symmetric", 905 "using", 906 "order", 907 "current_catalog", 908 } 909 910 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 911 912 # DuckDB doesn't generally support CREATE TABLE .. properties 913 # https://duckdb.org/docs/sql/statements/create_table.html 914 PROPERTIES_LOCATION = { 915 prop: exp.Properties.Location.UNSUPPORTED 916 for prop in generator.Generator.PROPERTIES_LOCATION 917 } 918 919 # There are a few exceptions (e.g. temporary tables) which are supported or 920 # can be transpiled to DuckDB, so we explicitly override them accordingly 921 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 922 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 923 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 924 925 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 926 exp.FirstValue, 927 exp.Lag, 928 exp.LastValue, 929 exp.Lead, 930 exp.NthValue, 931 ) 932 933 def lambda_sql( 934 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 935 ) -> str: 936 if expression.args.get("colon"): 937 prefix = "LAMBDA " 938 arrow_sep = ":" 939 wrap = False 940 else: 941 prefix = "" 942 943 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 944 return f"{prefix}{lambda_sql}" 945 946 def show_sql(self, expression: exp.Show) -> str: 947 return f"SHOW {expression.name}" 948 949 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 950 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 951 952 def strtotime_sql(self, expression: exp.StrToTime) -> str: 953 if expression.args.get("safe"): 954 formatted_time = self.format_time(expression) 955 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 956 return str_to_time_sql(self, expression) 957 958 def strtodate_sql(self, expression: exp.StrToDate) -> str: 959 if expression.args.get("safe"): 960 formatted_time = self.format_time(expression) 961 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 962 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 963 964 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 965 arg = expression.this 966 if expression.args.get("safe"): 967 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 968 return self.func("JSON", arg) 969 970 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 971 nano = expression.args.get("nano") 972 if nano is not None: 973 expression.set( 974 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 975 ) 976 977 return rename_func("MAKE_TIME")(self, expression) 978 979 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 980 sec = expression.args["sec"] 981 982 milli = expression.args.get("milli") 983 if milli is not None: 984 sec += milli.pop() / exp.Literal.number(1000.0) 985 986 nano = expression.args.get("nano") 987 if nano is not None: 988 sec += nano.pop() / exp.Literal.number(1000000000.0) 989 990 if milli or nano: 991 expression.set("sec", sec) 992 993 return rename_func("MAKE_TIMESTAMP")(self, expression) 994 995 def tablesample_sql( 996 self, 997 expression: exp.TableSample, 998 tablesample_keyword: t.Optional[str] = None, 999 ) -> str: 1000 if not isinstance(expression.parent, exp.Select): 1001 # This sample clause only applies to a single source, not the entire resulting relation 1002 tablesample_keyword = "TABLESAMPLE" 1003 1004 if expression.args.get("size"): 1005 method = expression.args.get("method") 1006 if method and method.name.upper() != "RESERVOIR": 1007 self.unsupported( 1008 f"Sampling method {method} is not supported with a discrete sample count, " 1009 "defaulting to reservoir sampling" 1010 ) 1011 expression.set("method", exp.var("RESERVOIR")) 1012 1013 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1014 1015 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1016 if isinstance(expression.parent, exp.UserDefinedFunction): 1017 return self.sql(expression, "this") 1018 return super().columndef_sql(expression, sep) 1019 1020 def join_sql(self, expression: exp.Join) -> str: 1021 if ( 1022 expression.side == "LEFT" 1023 and not expression.args.get("on") 1024 and isinstance(expression.this, exp.Unnest) 1025 ): 1026 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1027 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1028 return super().join_sql(expression.on(exp.true())) 1029 1030 return super().join_sql(expression) 1031 1032 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1033 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1034 if expression.args.get("is_end_exclusive"): 1035 return rename_func("RANGE")(self, expression) 1036 1037 return self.function_fallback_sql(expression) 1038 1039 def countif_sql(self, expression: exp.CountIf) -> str: 1040 if self.dialect.version >= Version("1.2"): 1041 return self.function_fallback_sql(expression) 1042 1043 # https://github.com/tobymao/sqlglot/pull/4749 1044 return count_if_to_sum(self, expression) 1045 1046 def bracket_sql(self, expression: exp.Bracket) -> str: 1047 if self.dialect.version >= Version("1.2"): 1048 return super().bracket_sql(expression) 1049 1050 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1051 this = expression.this 1052 if isinstance(this, exp.Array): 1053 this.replace(exp.paren(this)) 1054 1055 bracket = super().bracket_sql(expression) 1056 1057 if not expression.args.get("returns_list_for_maps"): 1058 if not this.type: 1059 from sqlglot.optimizer.annotate_types import annotate_types 1060 1061 this = annotate_types(this, dialect=self.dialect) 1062 1063 if this.is_type(exp.DataType.Type.MAP): 1064 bracket = f"({bracket})[1]" 1065 1066 return bracket 1067 1068 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1069 expression_sql = self.sql(expression, "expression") 1070 1071 func = expression.this 1072 if isinstance(func, exp.PERCENTILES): 1073 # Make the order key the first arg and slide the fraction to the right 1074 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1075 order_col = expression.find(exp.Ordered) 1076 if order_col: 1077 func.set("expression", func.this) 1078 func.set("this", order_col.this) 1079 1080 this = self.sql(expression, "this").rstrip(")") 1081 1082 return f"{this}{expression_sql})" 1083 1084 def length_sql(self, expression: exp.Length) -> str: 1085 arg = expression.this 1086 1087 # Dialects like BQ and Snowflake also accept binary values as args, so 1088 # DDB will attempt to infer the type or resort to case/when resolution 1089 if not expression.args.get("binary") or arg.is_string: 1090 return self.func("LENGTH", arg) 1091 1092 if not arg.type: 1093 from sqlglot.optimizer.annotate_types import annotate_types 1094 1095 arg = annotate_types(arg, dialect=self.dialect) 1096 1097 if arg.is_type(*exp.DataType.TEXT_TYPES): 1098 return self.func("LENGTH", arg) 1099 1100 # We need these casts to make duckdb's static type checker happy 1101 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1102 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1103 1104 case = ( 1105 exp.case(self.func("TYPEOF", arg)) 1106 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1107 .else_( 1108 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1109 ) # anonymous to break length_sql recursion 1110 ) 1111 1112 return self.sql(case) 1113 1114 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1115 this = expression.this 1116 key = expression.args.get("key") 1117 key_sql = key.name if isinstance(key, exp.Expression) else "" 1118 value_sql = self.sql(expression, "value") 1119 1120 kv_sql = f"{key_sql} := {value_sql}" 1121 1122 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1123 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1124 if isinstance(this, exp.Struct) and not this.expressions: 1125 return self.func("STRUCT_PACK", kv_sql) 1126 1127 return self.func("STRUCT_INSERT", this, kv_sql) 1128 1129 def unnest_sql(self, expression: exp.Unnest) -> str: 1130 explode_array = expression.args.get("explode_array") 1131 if explode_array: 1132 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1133 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1134 expression.expressions.append( 1135 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1136 ) 1137 1138 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1139 alias = expression.args.get("alias") 1140 if alias: 1141 expression.set("alias", None) 1142 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1143 1144 unnest_sql = super().unnest_sql(expression) 1145 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1146 return self.sql(select) 1147 1148 return super().unnest_sql(expression) 1149 1150 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1151 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1152 # DuckDB should render IGNORE NULLS only for the general-purpose 1153 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1154 return super().ignorenulls_sql(expression) 1155 1156 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1157 return self.sql(expression, "this") 1158 1159 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1160 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1161 # DuckDB should render RESPECT NULLS only for the general-purpose 1162 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1163 return super().respectnulls_sql(expression) 1164 1165 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1166 return self.sql(expression, "this") 1167 1168 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1169 this = self.sql(expression, "this") 1170 null_text = self.sql(expression, "null") 1171 1172 if null_text: 1173 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1174 1175 return self.func("ARRAY_TO_STRING", this, expression.expression) 1176 1177 @unsupported_args("position", "occurrence") 1178 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1179 group = expression.args.get("group") 1180 params = expression.args.get("parameters") 1181 1182 # Do not render group if there is no following argument, 1183 # and it's the default value for this dialect 1184 if ( 1185 not params 1186 and group 1187 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1188 ): 1189 group = None 1190 return self.func( 1191 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1192 ) 1193 1194 @unsupported_args("culture") 1195 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1196 fmt = expression.args.get("format") 1197 if fmt and fmt.is_int: 1198 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1199 1200 self.unsupported("Only integer formats are supported by NumberToStr") 1201 return self.function_fallback_sql(expression) 1202 1203 def autoincrementcolumnconstraint_sql(self, _) -> str: 1204 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1205 return "" 1206 1207 def aliases_sql(self, expression: exp.Aliases) -> str: 1208 this = expression.this 1209 if isinstance(this, exp.Posexplode): 1210 return self.posexplode_sql(this) 1211 1212 return super().aliases_sql(expression) 1213 1214 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1215 this = expression.this 1216 parent = expression.parent 1217 1218 # The default Spark aliases are "pos" and "col", unless specified otherwise 1219 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1220 1221 if isinstance(parent, exp.Aliases): 1222 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1223 pos, col = parent.expressions 1224 elif isinstance(parent, exp.Table): 1225 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1226 alias = parent.args.get("alias") 1227 if alias: 1228 pos, col = alias.columns or [pos, col] 1229 alias.pop() 1230 1231 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1232 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1233 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1234 gen_subscripts = self.sql( 1235 exp.Alias( 1236 this=exp.Anonymous( 1237 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1238 ) 1239 - exp.Literal.number(1), 1240 alias=pos, 1241 ) 1242 ) 1243 1244 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1245 1246 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1247 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1248 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1249 1250 return posexplode_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
933 def lambda_sql( 934 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 935 ) -> str: 936 if expression.args.get("colon"): 937 prefix = "LAMBDA " 938 arrow_sep = ":" 939 wrap = False 940 else: 941 prefix = "" 942 943 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 944 return f"{prefix}{lambda_sql}"
958 def strtodate_sql(self, expression: exp.StrToDate) -> str: 959 if expression.args.get("safe"): 960 formatted_time = self.format_time(expression) 961 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 962 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
970 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 971 nano = expression.args.get("nano") 972 if nano is not None: 973 expression.set( 974 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 975 ) 976 977 return rename_func("MAKE_TIME")(self, expression)
979 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 980 sec = expression.args["sec"] 981 982 milli = expression.args.get("milli") 983 if milli is not None: 984 sec += milli.pop() / exp.Literal.number(1000.0) 985 986 nano = expression.args.get("nano") 987 if nano is not None: 988 sec += nano.pop() / exp.Literal.number(1000000000.0) 989 990 if milli or nano: 991 expression.set("sec", sec) 992 993 return rename_func("MAKE_TIMESTAMP")(self, expression)
995 def tablesample_sql( 996 self, 997 expression: exp.TableSample, 998 tablesample_keyword: t.Optional[str] = None, 999 ) -> str: 1000 if not isinstance(expression.parent, exp.Select): 1001 # This sample clause only applies to a single source, not the entire resulting relation 1002 tablesample_keyword = "TABLESAMPLE" 1003 1004 if expression.args.get("size"): 1005 method = expression.args.get("method") 1006 if method and method.name.upper() != "RESERVOIR": 1007 self.unsupported( 1008 f"Sampling method {method} is not supported with a discrete sample count, " 1009 "defaulting to reservoir sampling" 1010 ) 1011 expression.set("method", exp.var("RESERVOIR")) 1012 1013 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
1020 def join_sql(self, expression: exp.Join) -> str: 1021 if ( 1022 expression.side == "LEFT" 1023 and not expression.args.get("on") 1024 and isinstance(expression.this, exp.Unnest) 1025 ): 1026 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1027 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1028 return super().join_sql(expression.on(exp.true())) 1029 1030 return super().join_sql(expression)
1046 def bracket_sql(self, expression: exp.Bracket) -> str: 1047 if self.dialect.version >= Version("1.2"): 1048 return super().bracket_sql(expression) 1049 1050 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1051 this = expression.this 1052 if isinstance(this, exp.Array): 1053 this.replace(exp.paren(this)) 1054 1055 bracket = super().bracket_sql(expression) 1056 1057 if not expression.args.get("returns_list_for_maps"): 1058 if not this.type: 1059 from sqlglot.optimizer.annotate_types import annotate_types 1060 1061 this = annotate_types(this, dialect=self.dialect) 1062 1063 if this.is_type(exp.DataType.Type.MAP): 1064 bracket = f"({bracket})[1]" 1065 1066 return bracket
1068 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1069 expression_sql = self.sql(expression, "expression") 1070 1071 func = expression.this 1072 if isinstance(func, exp.PERCENTILES): 1073 # Make the order key the first arg and slide the fraction to the right 1074 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1075 order_col = expression.find(exp.Ordered) 1076 if order_col: 1077 func.set("expression", func.this) 1078 func.set("this", order_col.this) 1079 1080 this = self.sql(expression, "this").rstrip(")") 1081 1082 return f"{this}{expression_sql})"
1084 def length_sql(self, expression: exp.Length) -> str: 1085 arg = expression.this 1086 1087 # Dialects like BQ and Snowflake also accept binary values as args, so 1088 # DDB will attempt to infer the type or resort to case/when resolution 1089 if not expression.args.get("binary") or arg.is_string: 1090 return self.func("LENGTH", arg) 1091 1092 if not arg.type: 1093 from sqlglot.optimizer.annotate_types import annotate_types 1094 1095 arg = annotate_types(arg, dialect=self.dialect) 1096 1097 if arg.is_type(*exp.DataType.TEXT_TYPES): 1098 return self.func("LENGTH", arg) 1099 1100 # We need these casts to make duckdb's static type checker happy 1101 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1102 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1103 1104 case = ( 1105 exp.case(self.func("TYPEOF", arg)) 1106 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1107 .else_( 1108 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1109 ) # anonymous to break length_sql recursion 1110 ) 1111 1112 return self.sql(case)
1114 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1115 this = expression.this 1116 key = expression.args.get("key") 1117 key_sql = key.name if isinstance(key, exp.Expression) else "" 1118 value_sql = self.sql(expression, "value") 1119 1120 kv_sql = f"{key_sql} := {value_sql}" 1121 1122 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1123 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1124 if isinstance(this, exp.Struct) and not this.expressions: 1125 return self.func("STRUCT_PACK", kv_sql) 1126 1127 return self.func("STRUCT_INSERT", this, kv_sql)
1129 def unnest_sql(self, expression: exp.Unnest) -> str: 1130 explode_array = expression.args.get("explode_array") 1131 if explode_array: 1132 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1133 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1134 expression.expressions.append( 1135 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1136 ) 1137 1138 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1139 alias = expression.args.get("alias") 1140 if alias: 1141 expression.set("alias", None) 1142 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1143 1144 unnest_sql = super().unnest_sql(expression) 1145 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1146 return self.sql(select) 1147 1148 return super().unnest_sql(expression)
1150 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1151 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1152 # DuckDB should render IGNORE NULLS only for the general-purpose 1153 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1154 return super().ignorenulls_sql(expression) 1155 1156 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1157 return self.sql(expression, "this")
1159 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1160 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1161 # DuckDB should render RESPECT NULLS only for the general-purpose 1162 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1163 return super().respectnulls_sql(expression) 1164 1165 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1166 return self.sql(expression, "this")
1168 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1169 this = self.sql(expression, "this") 1170 null_text = self.sql(expression, "null") 1171 1172 if null_text: 1173 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1174 1175 return self.func("ARRAY_TO_STRING", this, expression.expression)
1177 @unsupported_args("position", "occurrence") 1178 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1179 group = expression.args.get("group") 1180 params = expression.args.get("parameters") 1181 1182 # Do not render group if there is no following argument, 1183 # and it's the default value for this dialect 1184 if ( 1185 not params 1186 and group 1187 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1188 ): 1189 group = None 1190 return self.func( 1191 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1192 )
1194 @unsupported_args("culture") 1195 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1196 fmt = expression.args.get("format") 1197 if fmt and fmt.is_int: 1198 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1199 1200 self.unsupported("Only integer formats are supported by NumberToStr") 1201 return self.function_fallback_sql(expression)
1214 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1215 this = expression.this 1216 parent = expression.parent 1217 1218 # The default Spark aliases are "pos" and "col", unless specified otherwise 1219 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1220 1221 if isinstance(parent, exp.Aliases): 1222 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1223 pos, col = parent.expressions 1224 elif isinstance(parent, exp.Table): 1225 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1226 alias = parent.args.get("alias") 1227 if alias: 1228 pos, col = alias.columns or [pos, col] 1229 alias.pop() 1230 1231 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1232 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1233 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1234 gen_subscripts = self.sql( 1235 exp.Alias( 1236 this=exp.Anonymous( 1237 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1238 ) 1239 - exp.Literal.number(1), 1240 alias=pos, 1241 ) 1242 ) 1243 1244 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1245 1246 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1247 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1248 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1249 1250 return posexplode_sql
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql