sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_delta_to_binary_interval_op, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 no_datetime_sql, 22 encode_decode_sql, 23 build_formatted_time, 24 inline_array_unless_query, 25 no_comment_column_constraint_sql, 26 no_time_sql, 27 no_timestamp_sql, 28 pivot_column_names, 29 rename_func, 30 remove_from_array_using_filter, 31 strposition_sql, 32 str_to_time_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47 48# BigQuery -> DuckDB conversion for the DATE function 49def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 50 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 51 zone = self.sql(expression, "zone") 52 53 if zone: 54 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 55 date_str = f"{date_str} || ' ' || {zone}" 56 57 # This will create a TIMESTAMP with time zone information 58 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 59 60 return result 61 62 63# BigQuery -> DuckDB conversion for the TIME_DIFF function 64def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 65 this = exp.cast(expression.this, exp.DataType.Type.TIME) 66 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 67 68 # Although the 2 dialects share similar signatures, BQ seems to inverse 69 # the sign of the result so the start/end time operands are flipped 70 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 71 72 73@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 74def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 75 return self.func("ARRAY_SORT", expression.this) 76 77 78def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 79 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 80 return self.func(name, expression.this) 81 82 83def _build_sort_array_desc(args: t.List) -> exp.Expression: 84 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 85 86 87def _build_date_diff(args: t.List) -> exp.Expression: 88 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 89 90 91def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 92 def _builder(args: t.List) -> exp.GenerateSeries: 93 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 94 if len(args) == 1: 95 # DuckDB uses 0 as a default for the series' start when it's omitted 96 args.insert(0, exp.Literal.number("0")) 97 98 gen_series = exp.GenerateSeries.from_arg_list(args) 99 gen_series.set("is_end_exclusive", end_exclusive) 100 101 return gen_series 102 103 return _builder 104 105 106def _build_make_timestamp(args: t.List) -> exp.Expression: 107 if len(args) == 1: 108 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 109 110 return exp.TimestampFromParts( 111 year=seq_get(args, 0), 112 month=seq_get(args, 1), 113 day=seq_get(args, 2), 114 hour=seq_get(args, 3), 115 min=seq_get(args, 4), 116 sec=seq_get(args, 5), 117 ) 118 119 120def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 121 def _parse(self: DuckDB.Parser) -> exp.Show: 122 return self._parse_show_duckdb(*args, **kwargs) 123 124 return _parse 125 126 127def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 128 args: t.List[str] = [] 129 130 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 131 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 132 # The transformation to ROW will take place if: 133 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 134 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 135 ancestor_cast = expression.find_ancestor(exp.Cast) 136 is_bq_inline_struct = ( 137 (expression.find(exp.PropertyEQ) is None) 138 and ancestor_cast 139 and any( 140 casted_type.is_type(exp.DataType.Type.STRUCT) 141 for casted_type in ancestor_cast.find_all(exp.DataType) 142 ) 143 ) 144 145 for i, expr in enumerate(expression.expressions): 146 is_property_eq = isinstance(expr, exp.PropertyEQ) 147 value = expr.expression if is_property_eq else expr 148 149 if is_bq_inline_struct: 150 args.append(self.sql(value)) 151 else: 152 if is_property_eq: 153 if isinstance(expr.this, exp.Identifier): 154 key = self.sql(exp.Literal.string(expr.name)) 155 else: 156 key = self.sql(expr.this) 157 else: 158 key = self.sql(exp.Literal.string(f"_{i}")) 159 160 args.append(f"{key}: {self.sql(value)}") 161 162 csv_args = ", ".join(args) 163 164 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 165 166 167def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 168 if expression.is_type("array"): 169 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 170 171 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 172 if expression.is_type( 173 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 174 ): 175 return expression.this.value 176 177 return self.datatype_sql(expression) 178 179 180def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 181 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 182 return f"CAST({sql} AS TEXT)" 183 184 185def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 186 scale = expression.args.get("scale") 187 timestamp = expression.this 188 189 if scale in (None, exp.UnixToTime.SECONDS): 190 return self.func("TO_TIMESTAMP", timestamp) 191 if scale == exp.UnixToTime.MILLIS: 192 return self.func("EPOCH_MS", timestamp) 193 if scale == exp.UnixToTime.MICROS: 194 return self.func("MAKE_TIMESTAMP", timestamp) 195 196 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 197 198 199WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 200 201 202def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 203 arrow_sql = arrow_json_extract_sql(self, expression) 204 if not expression.same_parent and isinstance( 205 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 206 ): 207 arrow_sql = self.wrap(arrow_sql) 208 return arrow_sql 209 210 211def _implicit_datetime_cast( 212 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 213) -> t.Optional[exp.Expression]: 214 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 215 216 217def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 218 this = _implicit_datetime_cast(expression.this) 219 expr = _implicit_datetime_cast(expression.expression) 220 221 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 222 223 224def _generate_datetime_array_sql( 225 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 226) -> str: 227 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 228 229 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 230 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 231 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 232 233 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 234 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 235 start=start, end=end, step=expression.args.get("step") 236 ) 237 238 if is_generate_date_array: 239 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 240 # GENERATE_DATE_ARRAY we must cast it back to DATE array 241 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 242 243 return self.sql(gen_series) 244 245 246def _json_extract_value_array_sql( 247 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 248) -> str: 249 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 250 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 251 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 252 253 254class DuckDB(Dialect): 255 NULL_ORDERING = "nulls_are_last" 256 SUPPORTS_USER_DEFINED_TYPES = True 257 SAFE_DIVISION = True 258 INDEX_OFFSET = 1 259 CONCAT_COALESCE = True 260 SUPPORTS_ORDER_BY_ALL = True 261 SUPPORTS_FIXED_SIZE_ARRAYS = True 262 STRICT_JSON_PATH_SYNTAX = False 263 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 264 265 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 266 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 267 268 DATE_PART_MAPPING = { 269 **Dialect.DATE_PART_MAPPING, 270 "DAYOFWEEKISO": "ISODOW", 271 } 272 DATE_PART_MAPPING.pop("WEEKDAY") 273 274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path) 285 286 class Tokenizer(tokens.Tokenizer): 287 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 288 HEREDOC_STRINGS = ["$"] 289 290 HEREDOC_TAG_IS_IDENTIFIER = True 291 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "//": TokenType.DIV, 296 "**": TokenType.DSTAR, 297 "^@": TokenType.CARET_AT, 298 "@>": TokenType.AT_GT, 299 "<@": TokenType.LT_AT, 300 "ATTACH": TokenType.ATTACH, 301 "BINARY": TokenType.VARBINARY, 302 "BITSTRING": TokenType.BIT, 303 "BPCHAR": TokenType.TEXT, 304 "CHAR": TokenType.TEXT, 305 "DATETIME": TokenType.TIMESTAMPNTZ, 306 "DETACH": TokenType.DETACH, 307 "EXCLUDE": TokenType.EXCEPT, 308 "LOGICAL": TokenType.BOOLEAN, 309 "ONLY": TokenType.ONLY, 310 "PIVOT_WIDER": TokenType.PIVOT, 311 "POSITIONAL": TokenType.POSITIONAL, 312 "RESET": TokenType.COMMAND, 313 "SIGNED": TokenType.INT, 314 "STRING": TokenType.TEXT, 315 "SUMMARIZE": TokenType.SUMMARIZE, 316 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 317 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 318 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 319 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 320 "TIMESTAMP_US": TokenType.TIMESTAMP, 321 "UBIGINT": TokenType.UBIGINT, 322 "UINTEGER": TokenType.UINT, 323 "USMALLINT": TokenType.USMALLINT, 324 "UTINYINT": TokenType.UTINYINT, 325 "VARCHAR": TokenType.TEXT, 326 } 327 KEYWORDS.pop("/*+") 328 329 SINGLE_TOKENS = { 330 **tokens.Tokenizer.SINGLE_TOKENS, 331 "$": TokenType.PARAMETER, 332 } 333 334 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 335 336 class Parser(parser.Parser): 337 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 338 339 BITWISE = { 340 **parser.Parser.BITWISE, 341 TokenType.TILDA: exp.RegexpLike, 342 } 343 BITWISE.pop(TokenType.CARET) 344 345 RANGE_PARSERS = { 346 **parser.Parser.RANGE_PARSERS, 347 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 348 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 349 } 350 351 EXPONENT = { 352 **parser.Parser.EXPONENT, 353 TokenType.CARET: exp.Pow, 354 TokenType.DSTAR: exp.Pow, 355 } 356 357 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 358 359 SHOW_PARSERS = { 360 "TABLES": _show_parser("TABLES"), 361 "ALL TABLES": _show_parser("ALL TABLES"), 362 } 363 364 FUNCTIONS = { 365 **parser.Parser.FUNCTIONS, 366 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "EDITDIST3": exp.Levenshtein.from_arg_list, 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "GENERATE_SERIES": _build_generate_series(), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 389 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 390 "LIST_HAS": exp.ArrayContains.from_arg_list, 391 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 392 "LIST_REVERSE_SORT": _build_sort_array_desc, 393 "LIST_SORT": exp.SortArray.from_arg_list, 394 "LIST_TRANSFORM": exp.Transform.from_arg_list, 395 "LIST_VALUE": lambda args: exp.Array(expressions=args), 396 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 397 "MAKE_TIMESTAMP": _build_make_timestamp, 398 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 399 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 400 "RANGE": _build_generate_series(end_exclusive=True), 401 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 402 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 403 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 404 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 405 this=seq_get(args, 0), 406 expression=seq_get(args, 1), 407 replacement=seq_get(args, 2), 408 modifiers=seq_get(args, 3), 409 ), 410 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 411 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 412 "STRING_SPLIT": exp.Split.from_arg_list, 413 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 414 "STRING_TO_ARRAY": exp.Split.from_arg_list, 415 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 416 "STRUCT_PACK": exp.Struct.from_arg_list, 417 "STR_SPLIT": exp.Split.from_arg_list, 418 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 419 "TIME_BUCKET": exp.DateBin.from_arg_list, 420 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 421 "UNNEST": exp.Explode.from_arg_list, 422 "XOR": binary_from_function(exp.BitwiseXor), 423 } 424 425 FUNCTIONS.pop("DATE_SUB") 426 FUNCTIONS.pop("GLOB") 427 428 FUNCTION_PARSERS = { 429 **parser.Parser.FUNCTION_PARSERS, 430 **dict.fromkeys( 431 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 432 ), 433 } 434 FUNCTION_PARSERS.pop("DECODE") 435 436 NO_PAREN_FUNCTION_PARSERS = { 437 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 438 "MAP": lambda self: self._parse_map(), 439 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 440 } 441 442 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 443 TokenType.SEMI, 444 TokenType.ANTI, 445 } 446 447 PLACEHOLDER_PARSERS = { 448 **parser.Parser.PLACEHOLDER_PARSERS, 449 TokenType.PARAMETER: lambda self: ( 450 self.expression(exp.Placeholder, this=self._prev.text) 451 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 452 else None 453 ), 454 } 455 456 TYPE_CONVERTERS = { 457 # https://duckdb.org/docs/sql/data_types/numeric 458 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 459 # https://duckdb.org/docs/sql/data_types/text 460 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 461 } 462 463 STATEMENT_PARSERS = { 464 **parser.Parser.STATEMENT_PARSERS, 465 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 466 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 467 TokenType.SHOW: lambda self: self._parse_show(), 468 } 469 470 SET_PARSERS = { 471 **parser.Parser.SET_PARSERS, 472 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 473 } 474 475 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 476 index = self._index 477 if not self._match_text_seq("LAMBDA"): 478 return super()._parse_lambda(alias=alias) 479 480 expressions = self._parse_csv(self._parse_lambda_arg) 481 if not self._match(TokenType.COLON): 482 self._retreat(index) 483 return None 484 485 this = self._replace_lambda(self._parse_assignment(), expressions) 486 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 consume_pipe: bool = False, 513 ) -> t.Optional[exp.Expression]: 514 # DuckDB supports prefix aliases, e.g. FROM foo: bar 515 if self._next and self._next.token_type == TokenType.COLON: 516 alias = self._parse_table_alias( 517 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 518 ) 519 self._match(TokenType.COLON) 520 comments = self._prev_comments or [] 521 else: 522 alias = None 523 comments = [] 524 525 table = super()._parse_table( 526 schema=schema, 527 joins=joins, 528 alias_tokens=alias_tokens, 529 parse_bracket=parse_bracket, 530 is_db_reference=is_db_reference, 531 parse_partition=parse_partition, 532 ) 533 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 534 # Moves the comment next to the alias in `alias: table /* comment */` 535 comments += table.pop_comments() or [] 536 alias.comments = alias.pop_comments() + comments 537 table.set("alias", alias) 538 539 return table 540 541 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 542 # https://duckdb.org/docs/sql/samples.html 543 sample = super()._parse_table_sample(as_modifier=as_modifier) 544 if sample and not sample.args.get("method"): 545 if sample.args.get("size"): 546 sample.set("method", exp.var("RESERVOIR")) 547 else: 548 sample.set("method", exp.var("SYSTEM")) 549 550 return sample 551 552 def _parse_bracket( 553 self, this: t.Optional[exp.Expression] = None 554 ) -> t.Optional[exp.Expression]: 555 bracket = super()._parse_bracket(this) 556 557 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 558 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 559 bracket.set("returns_list_for_maps", True) 560 561 return bracket 562 563 def _parse_map(self) -> exp.ToMap | exp.Map: 564 if self._match(TokenType.L_BRACE, advance=False): 565 return self.expression(exp.ToMap, this=self._parse_bracket()) 566 567 args = self._parse_wrapped_csv(self._parse_assignment) 568 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 569 570 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 571 return self._parse_field_def() 572 573 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 574 if len(aggregations) == 1: 575 return super()._pivot_column_names(aggregations) 576 return pivot_column_names(aggregations, dialect="duckdb") 577 578 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 579 def _parse_attach_option() -> exp.AttachOption: 580 return self.expression( 581 exp.AttachOption, 582 this=self._parse_var(any_token=True), 583 expression=self._parse_field(any_token=True), 584 ) 585 586 self._match(TokenType.DATABASE) 587 exists = self._parse_exists(not_=is_attach) 588 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 589 590 if self._match(TokenType.L_PAREN, advance=False): 591 expressions = self._parse_wrapped_csv(_parse_attach_option) 592 else: 593 expressions = None 594 595 return ( 596 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 597 if is_attach 598 else self.expression(exp.Detach, this=this, exists=exists) 599 ) 600 601 def _parse_show_duckdb(self, this: str) -> exp.Show: 602 return self.expression(exp.Show, this=this) 603 604 def _parse_primary(self) -> t.Optional[exp.Expression]: 605 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 606 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 607 608 return super()._parse_primary() 609 610 class Generator(generator.Generator): 611 PARAMETER_TOKEN = "$" 612 NAMED_PLACEHOLDER_TOKEN = "$" 613 JOIN_HINTS = False 614 TABLE_HINTS = False 615 QUERY_HINTS = False 616 LIMIT_FETCH = "LIMIT" 617 STRUCT_DELIMITER = ("(", ")") 618 RENAME_TABLE_WITH_DB = False 619 NVL2_SUPPORTED = False 620 SEMI_ANTI_JOIN_WITH_SIDE = False 621 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 622 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 623 LAST_DAY_SUPPORTS_DATE_PART = False 624 JSON_KEY_VALUE_PAIR_SEP = "," 625 IGNORE_NULLS_IN_FUNC = True 626 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 627 SUPPORTS_CREATE_TABLE_LIKE = False 628 MULTI_ARG_DISTINCT = False 629 CAN_IMPLEMENT_ARRAY_ANY = True 630 SUPPORTS_TO_NUMBER = False 631 SUPPORTS_WINDOW_EXCLUDE = True 632 COPY_HAS_INTO_KEYWORD = False 633 STAR_EXCEPT = "EXCLUDE" 634 PAD_FILL_PATTERN_IS_REQUIRED = True 635 ARRAY_CONCAT_IS_VAR_LEN = False 636 ARRAY_SIZE_DIM_REQUIRED = False 637 NORMALIZE_EXTRACT_DATE_PARTS = True 638 SUPPORTS_LIKE_QUANTIFIERS = False 639 640 TRANSFORMS = { 641 **generator.Generator.TRANSFORMS, 642 exp.ApproxDistinct: approx_count_distinct_sql, 643 exp.Array: inline_array_unless_query, 644 exp.ArrayFilter: rename_func("LIST_FILTER"), 645 exp.ArrayRemove: remove_from_array_using_filter, 646 exp.ArraySort: _array_sort_sql, 647 exp.ArraySum: rename_func("LIST_SUM"), 648 exp.ArrayUniqueAgg: lambda self, e: self.func( 649 "LIST", exp.Distinct(expressions=[e.this]) 650 ), 651 exp.BitwiseXor: rename_func("XOR"), 652 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 653 exp.CurrentDate: lambda *_: "CURRENT_DATE", 654 exp.CurrentTime: lambda *_: "CURRENT_TIME", 655 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 656 exp.DayOfMonth: rename_func("DAYOFMONTH"), 657 exp.DayOfWeek: rename_func("DAYOFWEEK"), 658 exp.DayOfWeekIso: rename_func("ISODOW"), 659 exp.DayOfYear: rename_func("DAYOFYEAR"), 660 exp.DataType: _datatype_sql, 661 exp.Date: _date_sql, 662 exp.DateAdd: date_delta_to_binary_interval_op(), 663 exp.DateFromParts: rename_func("MAKE_DATE"), 664 exp.DateSub: date_delta_to_binary_interval_op(), 665 exp.DateDiff: _date_diff_sql, 666 exp.DateStrToDate: datestrtodate_sql, 667 exp.Datetime: no_datetime_sql, 668 exp.DatetimeSub: date_delta_to_binary_interval_op(), 669 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 670 exp.DateToDi: lambda self, 671 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 672 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 673 exp.DiToDate: lambda self, 674 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 675 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 676 exp.GenerateDateArray: _generate_datetime_array_sql, 677 exp.GenerateTimestampArray: _generate_datetime_array_sql, 678 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 679 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 680 exp.Explode: rename_func("UNNEST"), 681 exp.IntDiv: lambda self, e: self.binary(e, "//"), 682 exp.IsInf: rename_func("ISINF"), 683 exp.IsNan: rename_func("ISNAN"), 684 exp.JSONBExists: rename_func("JSON_EXISTS"), 685 exp.JSONExtract: _arrow_json_extract_sql, 686 exp.JSONExtractArray: _json_extract_value_array_sql, 687 exp.JSONExtractScalar: _arrow_json_extract_sql, 688 exp.JSONFormat: _json_format_sql, 689 exp.JSONValueArray: _json_extract_value_array_sql, 690 exp.Lateral: explode_to_unnest_sql, 691 exp.LogicalOr: rename_func("BOOL_OR"), 692 exp.LogicalAnd: rename_func("BOOL_AND"), 693 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 694 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 695 exp.MonthsBetween: lambda self, e: self.func( 696 "DATEDIFF", 697 "'month'", 698 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 699 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 700 ), 701 exp.PercentileCont: rename_func("QUANTILE_CONT"), 702 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 703 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 704 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 705 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 706 exp.RegexpReplace: lambda self, e: self.func( 707 "REGEXP_REPLACE", 708 e.this, 709 e.expression, 710 e.args.get("replacement"), 711 e.args.get("modifiers"), 712 ), 713 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 714 exp.RegexpILike: lambda self, e: self.func( 715 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 716 ), 717 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 718 exp.Return: lambda self, e: self.sql(e, "this"), 719 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 720 exp.Rand: rename_func("RANDOM"), 721 exp.SHA: rename_func("SHA1"), 722 exp.SHA2: sha256_sql, 723 exp.Split: rename_func("STR_SPLIT"), 724 exp.SortArray: _sort_array_sql, 725 exp.StrPosition: strposition_sql, 726 exp.StrToUnix: lambda self, e: self.func( 727 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 728 ), 729 exp.Struct: _struct_sql, 730 exp.Transform: rename_func("LIST_TRANSFORM"), 731 exp.TimeAdd: date_delta_to_binary_interval_op(), 732 exp.Time: no_time_sql, 733 exp.TimeDiff: _timediff_sql, 734 exp.Timestamp: no_timestamp_sql, 735 exp.TimestampDiff: lambda self, e: self.func( 736 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 737 ), 738 exp.TimestampTrunc: timestamptrunc_sql(), 739 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 740 exp.TimeStrToTime: timestrtotime_sql, 741 exp.TimeStrToUnix: lambda self, e: self.func( 742 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 743 ), 744 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 745 exp.TimeToUnix: rename_func("EPOCH"), 746 exp.TsOrDiToDi: lambda self, 747 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 748 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 749 exp.TsOrDsDiff: lambda self, e: self.func( 750 "DATE_DIFF", 751 f"'{e.args.get('unit') or 'DAY'}'", 752 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 753 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 754 ), 755 exp.UnixToStr: lambda self, e: self.func( 756 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 757 ), 758 exp.DatetimeTrunc: lambda self, e: self.func( 759 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 760 ), 761 exp.UnixToTime: _unix_to_time_sql, 762 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 763 exp.VariancePop: rename_func("VAR_POP"), 764 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 765 exp.Xor: bool_xor_sql, 766 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 767 rename_func("LEVENSHTEIN") 768 ), 769 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 770 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 771 exp.DateBin: rename_func("TIME_BUCKET"), 772 } 773 774 SUPPORTED_JSON_PATH_PARTS = { 775 exp.JSONPathKey, 776 exp.JSONPathRoot, 777 exp.JSONPathSubscript, 778 exp.JSONPathWildcard, 779 } 780 781 TYPE_MAPPING = { 782 **generator.Generator.TYPE_MAPPING, 783 exp.DataType.Type.BINARY: "BLOB", 784 exp.DataType.Type.BPCHAR: "TEXT", 785 exp.DataType.Type.CHAR: "TEXT", 786 exp.DataType.Type.DATETIME: "TIMESTAMP", 787 exp.DataType.Type.FLOAT: "REAL", 788 exp.DataType.Type.JSONB: "JSON", 789 exp.DataType.Type.NCHAR: "TEXT", 790 exp.DataType.Type.NVARCHAR: "TEXT", 791 exp.DataType.Type.UINT: "UINTEGER", 792 exp.DataType.Type.VARBINARY: "BLOB", 793 exp.DataType.Type.ROWVERSION: "BLOB", 794 exp.DataType.Type.VARCHAR: "TEXT", 795 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 796 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 797 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 798 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 799 } 800 801 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 802 RESERVED_KEYWORDS = { 803 "array", 804 "analyse", 805 "union", 806 "all", 807 "when", 808 "in_p", 809 "default", 810 "create_p", 811 "window", 812 "asymmetric", 813 "to", 814 "else", 815 "localtime", 816 "from", 817 "end_p", 818 "select", 819 "current_date", 820 "foreign", 821 "with", 822 "grant", 823 "session_user", 824 "or", 825 "except", 826 "references", 827 "fetch", 828 "limit", 829 "group_p", 830 "leading", 831 "into", 832 "collate", 833 "offset", 834 "do", 835 "then", 836 "localtimestamp", 837 "check_p", 838 "lateral_p", 839 "current_role", 840 "where", 841 "asc_p", 842 "placing", 843 "desc_p", 844 "user", 845 "unique", 846 "initially", 847 "column", 848 "both", 849 "some", 850 "as", 851 "any", 852 "only", 853 "deferrable", 854 "null_p", 855 "current_time", 856 "true_p", 857 "table", 858 "case", 859 "trailing", 860 "variadic", 861 "for", 862 "on", 863 "distinct", 864 "false_p", 865 "not", 866 "constraint", 867 "current_timestamp", 868 "returning", 869 "primary", 870 "intersect", 871 "having", 872 "analyze", 873 "current_user", 874 "and", 875 "cast", 876 "symmetric", 877 "using", 878 "order", 879 "current_catalog", 880 } 881 882 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 883 884 # DuckDB doesn't generally support CREATE TABLE .. properties 885 # https://duckdb.org/docs/sql/statements/create_table.html 886 PROPERTIES_LOCATION = { 887 prop: exp.Properties.Location.UNSUPPORTED 888 for prop in generator.Generator.PROPERTIES_LOCATION 889 } 890 891 # There are a few exceptions (e.g. temporary tables) which are supported or 892 # can be transpiled to DuckDB, so we explicitly override them accordingly 893 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 894 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 895 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 896 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 897 898 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 899 exp.FirstValue, 900 exp.Lag, 901 exp.LastValue, 902 exp.Lead, 903 exp.NthValue, 904 ) 905 906 def lambda_sql( 907 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 908 ) -> str: 909 if expression.args.get("colon"): 910 prefix = "LAMBDA " 911 arrow_sep = ":" 912 wrap = False 913 else: 914 prefix = "" 915 916 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 917 return f"{prefix}{lambda_sql}" 918 919 def show_sql(self, expression: exp.Show) -> str: 920 return f"SHOW {expression.name}" 921 922 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 923 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 924 925 def strtotime_sql(self, expression: exp.StrToTime) -> str: 926 if expression.args.get("safe"): 927 formatted_time = self.format_time(expression) 928 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 929 return str_to_time_sql(self, expression) 930 931 def strtodate_sql(self, expression: exp.StrToDate) -> str: 932 if expression.args.get("safe"): 933 formatted_time = self.format_time(expression) 934 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 935 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 936 937 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 938 arg = expression.this 939 if expression.args.get("safe"): 940 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 941 return self.func("JSON", arg) 942 943 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 944 nano = expression.args.get("nano") 945 if nano is not None: 946 expression.set( 947 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 948 ) 949 950 return rename_func("MAKE_TIME")(self, expression) 951 952 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 953 sec = expression.args["sec"] 954 955 milli = expression.args.get("milli") 956 if milli is not None: 957 sec += milli.pop() / exp.Literal.number(1000.0) 958 959 nano = expression.args.get("nano") 960 if nano is not None: 961 sec += nano.pop() / exp.Literal.number(1000000000.0) 962 963 if milli or nano: 964 expression.set("sec", sec) 965 966 return rename_func("MAKE_TIMESTAMP")(self, expression) 967 968 def tablesample_sql( 969 self, 970 expression: exp.TableSample, 971 tablesample_keyword: t.Optional[str] = None, 972 ) -> str: 973 if not isinstance(expression.parent, exp.Select): 974 # This sample clause only applies to a single source, not the entire resulting relation 975 tablesample_keyword = "TABLESAMPLE" 976 977 if expression.args.get("size"): 978 method = expression.args.get("method") 979 if method and method.name.upper() != "RESERVOIR": 980 self.unsupported( 981 f"Sampling method {method} is not supported with a discrete sample count, " 982 "defaulting to reservoir sampling" 983 ) 984 expression.set("method", exp.var("RESERVOIR")) 985 986 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 987 988 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 989 if isinstance(expression.parent, exp.UserDefinedFunction): 990 return self.sql(expression, "this") 991 return super().columndef_sql(expression, sep) 992 993 def join_sql(self, expression: exp.Join) -> str: 994 if ( 995 expression.side == "LEFT" 996 and not expression.args.get("on") 997 and isinstance(expression.this, exp.Unnest) 998 ): 999 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1000 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1001 return super().join_sql(expression.on(exp.true())) 1002 1003 return super().join_sql(expression) 1004 1005 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1006 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1007 if expression.args.get("is_end_exclusive"): 1008 return rename_func("RANGE")(self, expression) 1009 1010 return self.function_fallback_sql(expression) 1011 1012 def countif_sql(self, expression: exp.CountIf) -> str: 1013 if self.dialect.version >= Version("1.2"): 1014 return self.function_fallback_sql(expression) 1015 1016 # https://github.com/tobymao/sqlglot/pull/4749 1017 return count_if_to_sum(self, expression) 1018 1019 def bracket_sql(self, expression: exp.Bracket) -> str: 1020 if self.dialect.version >= Version("1.2"): 1021 return super().bracket_sql(expression) 1022 1023 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1024 this = expression.this 1025 if isinstance(this, exp.Array): 1026 this.replace(exp.paren(this)) 1027 1028 bracket = super().bracket_sql(expression) 1029 1030 if not expression.args.get("returns_list_for_maps"): 1031 if not this.type: 1032 from sqlglot.optimizer.annotate_types import annotate_types 1033 1034 this = annotate_types(this, dialect=self.dialect) 1035 1036 if this.is_type(exp.DataType.Type.MAP): 1037 bracket = f"({bracket})[1]" 1038 1039 return bracket 1040 1041 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1042 expression_sql = self.sql(expression, "expression") 1043 1044 func = expression.this 1045 if isinstance(func, exp.PERCENTILES): 1046 # Make the order key the first arg and slide the fraction to the right 1047 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1048 order_col = expression.find(exp.Ordered) 1049 if order_col: 1050 func.set("expression", func.this) 1051 func.set("this", order_col.this) 1052 1053 this = self.sql(expression, "this").rstrip(")") 1054 1055 return f"{this}{expression_sql})" 1056 1057 def length_sql(self, expression: exp.Length) -> str: 1058 arg = expression.this 1059 1060 # Dialects like BQ and Snowflake also accept binary values as args, so 1061 # DDB will attempt to infer the type or resort to case/when resolution 1062 if not expression.args.get("binary") or arg.is_string: 1063 return self.func("LENGTH", arg) 1064 1065 if not arg.type: 1066 from sqlglot.optimizer.annotate_types import annotate_types 1067 1068 arg = annotate_types(arg, dialect=self.dialect) 1069 1070 if arg.is_type(*exp.DataType.TEXT_TYPES): 1071 return self.func("LENGTH", arg) 1072 1073 # We need these casts to make duckdb's static type checker happy 1074 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1075 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1076 1077 case = ( 1078 exp.case(self.func("TYPEOF", arg)) 1079 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1080 .else_( 1081 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1082 ) # anonymous to break length_sql recursion 1083 ) 1084 1085 return self.sql(case) 1086 1087 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1088 this = expression.this 1089 key = expression.args.get("key") 1090 key_sql = key.name if isinstance(key, exp.Expression) else "" 1091 value_sql = self.sql(expression, "value") 1092 1093 kv_sql = f"{key_sql} := {value_sql}" 1094 1095 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1096 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1097 if isinstance(this, exp.Struct) and not this.expressions: 1098 return self.func("STRUCT_PACK", kv_sql) 1099 1100 return self.func("STRUCT_INSERT", this, kv_sql) 1101 1102 def unnest_sql(self, expression: exp.Unnest) -> str: 1103 explode_array = expression.args.get("explode_array") 1104 if explode_array: 1105 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1106 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1107 expression.expressions.append( 1108 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1109 ) 1110 1111 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1112 alias = expression.args.get("alias") 1113 if isinstance(alias, exp.TableAlias): 1114 expression.set("alias", None) 1115 if alias.columns: 1116 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1117 1118 unnest_sql = super().unnest_sql(expression) 1119 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1120 return self.sql(select) 1121 1122 return super().unnest_sql(expression) 1123 1124 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1125 this = expression.this 1126 1127 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1128 # DuckDB should render IGNORE NULLS only for the general-purpose 1129 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1130 return super().ignorenulls_sql(expression) 1131 1132 if isinstance(this, exp.First): 1133 this = exp.AnyValue(this=this.this) 1134 1135 if not isinstance(this, exp.AnyValue): 1136 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1137 1138 return self.sql(this) 1139 1140 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1141 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1142 # DuckDB should render RESPECT NULLS only for the general-purpose 1143 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1144 return super().respectnulls_sql(expression) 1145 1146 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1147 return self.sql(expression, "this") 1148 1149 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1150 this = self.sql(expression, "this") 1151 null_text = self.sql(expression, "null") 1152 1153 if null_text: 1154 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1155 1156 return self.func("ARRAY_TO_STRING", this, expression.expression) 1157 1158 @unsupported_args("position", "occurrence") 1159 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1160 group = expression.args.get("group") 1161 params = expression.args.get("parameters") 1162 1163 # Do not render group if there is no following argument, 1164 # and it's the default value for this dialect 1165 if ( 1166 not params 1167 and group 1168 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1169 ): 1170 group = None 1171 return self.func( 1172 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1173 ) 1174 1175 @unsupported_args("culture") 1176 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1177 fmt = expression.args.get("format") 1178 if fmt and fmt.is_int: 1179 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1180 1181 self.unsupported("Only integer formats are supported by NumberToStr") 1182 return self.function_fallback_sql(expression) 1183 1184 def autoincrementcolumnconstraint_sql(self, _) -> str: 1185 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1186 return "" 1187 1188 def aliases_sql(self, expression: exp.Aliases) -> str: 1189 this = expression.this 1190 if isinstance(this, exp.Posexplode): 1191 return self.posexplode_sql(this) 1192 1193 return super().aliases_sql(expression) 1194 1195 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1196 this = expression.this 1197 parent = expression.parent 1198 1199 # The default Spark aliases are "pos" and "col", unless specified otherwise 1200 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1201 1202 if isinstance(parent, exp.Aliases): 1203 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1204 pos, col = parent.expressions 1205 elif isinstance(parent, exp.Table): 1206 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1207 alias = parent.args.get("alias") 1208 if alias: 1209 pos, col = alias.columns or [pos, col] 1210 alias.pop() 1211 1212 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1213 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1214 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1215 gen_subscripts = self.sql( 1216 exp.Alias( 1217 this=exp.Anonymous( 1218 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1219 ) 1220 - exp.Literal.number(1), 1221 alias=pos, 1222 ) 1223 ) 1224 1225 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1226 1227 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1228 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1229 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1230 1231 return posexplode_sql 1232 1233 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1234 this = expression.this 1235 1236 if not this.type: 1237 from sqlglot.optimizer.annotate_types import annotate_types 1238 1239 this = annotate_types(this, dialect=self.dialect) 1240 1241 if this.is_type(*exp.DataType.TEXT_TYPES): 1242 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1243 1244 func = self.func( 1245 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1246 ) 1247 1248 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1249 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1250 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1251 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1252 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1253 return self.sql(exp.Cast(this=func, to=this.type)) 1254 1255 return self.sql(func)
255class DuckDB(Dialect): 256 NULL_ORDERING = "nulls_are_last" 257 SUPPORTS_USER_DEFINED_TYPES = True 258 SAFE_DIVISION = True 259 INDEX_OFFSET = 1 260 CONCAT_COALESCE = True 261 SUPPORTS_ORDER_BY_ALL = True 262 SUPPORTS_FIXED_SIZE_ARRAYS = True 263 STRICT_JSON_PATH_SYNTAX = False 264 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 265 266 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 DATE_PART_MAPPING = { 270 **Dialect.DATE_PART_MAPPING, 271 "DAYOFWEEKISO": "ISODOW", 272 } 273 DATE_PART_MAPPING.pop("WEEKDAY") 274 275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path) 286 287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "EXCLUDE": TokenType.EXCEPT, 309 "LOGICAL": TokenType.BOOLEAN, 310 "ONLY": TokenType.ONLY, 311 "PIVOT_WIDER": TokenType.PIVOT, 312 "POSITIONAL": TokenType.POSITIONAL, 313 "RESET": TokenType.COMMAND, 314 "SIGNED": TokenType.INT, 315 "STRING": TokenType.TEXT, 316 "SUMMARIZE": TokenType.SUMMARIZE, 317 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 318 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 319 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 320 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 321 "TIMESTAMP_US": TokenType.TIMESTAMP, 322 "UBIGINT": TokenType.UBIGINT, 323 "UINTEGER": TokenType.UINT, 324 "USMALLINT": TokenType.USMALLINT, 325 "UTINYINT": TokenType.UTINYINT, 326 "VARCHAR": TokenType.TEXT, 327 } 328 KEYWORDS.pop("/*+") 329 330 SINGLE_TOKENS = { 331 **tokens.Tokenizer.SINGLE_TOKENS, 332 "$": TokenType.PARAMETER, 333 } 334 335 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 336 337 class Parser(parser.Parser): 338 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 339 340 BITWISE = { 341 **parser.Parser.BITWISE, 342 TokenType.TILDA: exp.RegexpLike, 343 } 344 BITWISE.pop(TokenType.CARET) 345 346 RANGE_PARSERS = { 347 **parser.Parser.RANGE_PARSERS, 348 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 349 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 350 } 351 352 EXPONENT = { 353 **parser.Parser.EXPONENT, 354 TokenType.CARET: exp.Pow, 355 TokenType.DSTAR: exp.Pow, 356 } 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 359 360 SHOW_PARSERS = { 361 "TABLES": _show_parser("TABLES"), 362 "ALL TABLES": _show_parser("ALL TABLES"), 363 } 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "EDITDIST3": exp.Levenshtein.from_arg_list, 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "GENERATE_SERIES": _build_generate_series(), 386 "JSON": exp.ParseJSON.from_arg_list, 387 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 388 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 389 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 390 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 391 "LIST_HAS": exp.ArrayContains.from_arg_list, 392 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 393 "LIST_REVERSE_SORT": _build_sort_array_desc, 394 "LIST_SORT": exp.SortArray.from_arg_list, 395 "LIST_TRANSFORM": exp.Transform.from_arg_list, 396 "LIST_VALUE": lambda args: exp.Array(expressions=args), 397 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 398 "MAKE_TIMESTAMP": _build_make_timestamp, 399 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 400 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 401 "RANGE": _build_generate_series(end_exclusive=True), 402 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 403 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 404 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 405 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 406 this=seq_get(args, 0), 407 expression=seq_get(args, 1), 408 replacement=seq_get(args, 2), 409 modifiers=seq_get(args, 3), 410 ), 411 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 412 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 413 "STRING_SPLIT": exp.Split.from_arg_list, 414 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 415 "STRING_TO_ARRAY": exp.Split.from_arg_list, 416 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 417 "STRUCT_PACK": exp.Struct.from_arg_list, 418 "STR_SPLIT": exp.Split.from_arg_list, 419 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 420 "TIME_BUCKET": exp.DateBin.from_arg_list, 421 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 422 "UNNEST": exp.Explode.from_arg_list, 423 "XOR": binary_from_function(exp.BitwiseXor), 424 } 425 426 FUNCTIONS.pop("DATE_SUB") 427 FUNCTIONS.pop("GLOB") 428 429 FUNCTION_PARSERS = { 430 **parser.Parser.FUNCTION_PARSERS, 431 **dict.fromkeys( 432 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 433 ), 434 } 435 FUNCTION_PARSERS.pop("DECODE") 436 437 NO_PAREN_FUNCTION_PARSERS = { 438 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 439 "MAP": lambda self: self._parse_map(), 440 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 441 } 442 443 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 444 TokenType.SEMI, 445 TokenType.ANTI, 446 } 447 448 PLACEHOLDER_PARSERS = { 449 **parser.Parser.PLACEHOLDER_PARSERS, 450 TokenType.PARAMETER: lambda self: ( 451 self.expression(exp.Placeholder, this=self._prev.text) 452 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 453 else None 454 ), 455 } 456 457 TYPE_CONVERTERS = { 458 # https://duckdb.org/docs/sql/data_types/numeric 459 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 460 # https://duckdb.org/docs/sql/data_types/text 461 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 462 } 463 464 STATEMENT_PARSERS = { 465 **parser.Parser.STATEMENT_PARSERS, 466 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 467 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 468 TokenType.SHOW: lambda self: self._parse_show(), 469 } 470 471 SET_PARSERS = { 472 **parser.Parser.SET_PARSERS, 473 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 474 } 475 476 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 477 index = self._index 478 if not self._match_text_seq("LAMBDA"): 479 return super()._parse_lambda(alias=alias) 480 481 expressions = self._parse_csv(self._parse_lambda_arg) 482 if not self._match(TokenType.COLON): 483 self._retreat(index) 484 return None 485 486 this = self._replace_lambda(self._parse_assignment(), expressions) 487 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 488 489 def _parse_expression(self) -> t.Optional[exp.Expression]: 490 # DuckDB supports prefix aliases, e.g. foo: 1 491 if self._next and self._next.token_type == TokenType.COLON: 492 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 493 self._match(TokenType.COLON) 494 comments = self._prev_comments or [] 495 496 this = self._parse_assignment() 497 if isinstance(this, exp.Expression): 498 # Moves the comment next to the alias in `alias: expr /* comment */` 499 comments += this.pop_comments() or [] 500 501 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 502 503 return super()._parse_expression() 504 505 def _parse_table( 506 self, 507 schema: bool = False, 508 joins: bool = False, 509 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 510 parse_bracket: bool = False, 511 is_db_reference: bool = False, 512 parse_partition: bool = False, 513 consume_pipe: bool = False, 514 ) -> t.Optional[exp.Expression]: 515 # DuckDB supports prefix aliases, e.g. FROM foo: bar 516 if self._next and self._next.token_type == TokenType.COLON: 517 alias = self._parse_table_alias( 518 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 519 ) 520 self._match(TokenType.COLON) 521 comments = self._prev_comments or [] 522 else: 523 alias = None 524 comments = [] 525 526 table = super()._parse_table( 527 schema=schema, 528 joins=joins, 529 alias_tokens=alias_tokens, 530 parse_bracket=parse_bracket, 531 is_db_reference=is_db_reference, 532 parse_partition=parse_partition, 533 ) 534 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 535 # Moves the comment next to the alias in `alias: table /* comment */` 536 comments += table.pop_comments() or [] 537 alias.comments = alias.pop_comments() + comments 538 table.set("alias", alias) 539 540 return table 541 542 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 543 # https://duckdb.org/docs/sql/samples.html 544 sample = super()._parse_table_sample(as_modifier=as_modifier) 545 if sample and not sample.args.get("method"): 546 if sample.args.get("size"): 547 sample.set("method", exp.var("RESERVOIR")) 548 else: 549 sample.set("method", exp.var("SYSTEM")) 550 551 return sample 552 553 def _parse_bracket( 554 self, this: t.Optional[exp.Expression] = None 555 ) -> t.Optional[exp.Expression]: 556 bracket = super()._parse_bracket(this) 557 558 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 559 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 560 bracket.set("returns_list_for_maps", True) 561 562 return bracket 563 564 def _parse_map(self) -> exp.ToMap | exp.Map: 565 if self._match(TokenType.L_BRACE, advance=False): 566 return self.expression(exp.ToMap, this=self._parse_bracket()) 567 568 args = self._parse_wrapped_csv(self._parse_assignment) 569 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 570 571 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 572 return self._parse_field_def() 573 574 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 575 if len(aggregations) == 1: 576 return super()._pivot_column_names(aggregations) 577 return pivot_column_names(aggregations, dialect="duckdb") 578 579 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 580 def _parse_attach_option() -> exp.AttachOption: 581 return self.expression( 582 exp.AttachOption, 583 this=self._parse_var(any_token=True), 584 expression=self._parse_field(any_token=True), 585 ) 586 587 self._match(TokenType.DATABASE) 588 exists = self._parse_exists(not_=is_attach) 589 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 590 591 if self._match(TokenType.L_PAREN, advance=False): 592 expressions = self._parse_wrapped_csv(_parse_attach_option) 593 else: 594 expressions = None 595 596 return ( 597 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 598 if is_attach 599 else self.expression(exp.Detach, this=this, exists=exists) 600 ) 601 602 def _parse_show_duckdb(self, this: str) -> exp.Show: 603 return self.expression(exp.Show, this=this) 604 605 def _parse_primary(self) -> t.Optional[exp.Expression]: 606 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 607 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 608 609 return super()._parse_primary() 610 611 class Generator(generator.Generator): 612 PARAMETER_TOKEN = "$" 613 NAMED_PLACEHOLDER_TOKEN = "$" 614 JOIN_HINTS = False 615 TABLE_HINTS = False 616 QUERY_HINTS = False 617 LIMIT_FETCH = "LIMIT" 618 STRUCT_DELIMITER = ("(", ")") 619 RENAME_TABLE_WITH_DB = False 620 NVL2_SUPPORTED = False 621 SEMI_ANTI_JOIN_WITH_SIDE = False 622 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 623 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 624 LAST_DAY_SUPPORTS_DATE_PART = False 625 JSON_KEY_VALUE_PAIR_SEP = "," 626 IGNORE_NULLS_IN_FUNC = True 627 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 628 SUPPORTS_CREATE_TABLE_LIKE = False 629 MULTI_ARG_DISTINCT = False 630 CAN_IMPLEMENT_ARRAY_ANY = True 631 SUPPORTS_TO_NUMBER = False 632 SUPPORTS_WINDOW_EXCLUDE = True 633 COPY_HAS_INTO_KEYWORD = False 634 STAR_EXCEPT = "EXCLUDE" 635 PAD_FILL_PATTERN_IS_REQUIRED = True 636 ARRAY_CONCAT_IS_VAR_LEN = False 637 ARRAY_SIZE_DIM_REQUIRED = False 638 NORMALIZE_EXTRACT_DATE_PARTS = True 639 SUPPORTS_LIKE_QUANTIFIERS = False 640 641 TRANSFORMS = { 642 **generator.Generator.TRANSFORMS, 643 exp.ApproxDistinct: approx_count_distinct_sql, 644 exp.Array: inline_array_unless_query, 645 exp.ArrayFilter: rename_func("LIST_FILTER"), 646 exp.ArrayRemove: remove_from_array_using_filter, 647 exp.ArraySort: _array_sort_sql, 648 exp.ArraySum: rename_func("LIST_SUM"), 649 exp.ArrayUniqueAgg: lambda self, e: self.func( 650 "LIST", exp.Distinct(expressions=[e.this]) 651 ), 652 exp.BitwiseXor: rename_func("XOR"), 653 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 654 exp.CurrentDate: lambda *_: "CURRENT_DATE", 655 exp.CurrentTime: lambda *_: "CURRENT_TIME", 656 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 657 exp.DayOfMonth: rename_func("DAYOFMONTH"), 658 exp.DayOfWeek: rename_func("DAYOFWEEK"), 659 exp.DayOfWeekIso: rename_func("ISODOW"), 660 exp.DayOfYear: rename_func("DAYOFYEAR"), 661 exp.DataType: _datatype_sql, 662 exp.Date: _date_sql, 663 exp.DateAdd: date_delta_to_binary_interval_op(), 664 exp.DateFromParts: rename_func("MAKE_DATE"), 665 exp.DateSub: date_delta_to_binary_interval_op(), 666 exp.DateDiff: _date_diff_sql, 667 exp.DateStrToDate: datestrtodate_sql, 668 exp.Datetime: no_datetime_sql, 669 exp.DatetimeSub: date_delta_to_binary_interval_op(), 670 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 671 exp.DateToDi: lambda self, 672 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 673 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 674 exp.DiToDate: lambda self, 675 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 676 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 677 exp.GenerateDateArray: _generate_datetime_array_sql, 678 exp.GenerateTimestampArray: _generate_datetime_array_sql, 679 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 680 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 681 exp.Explode: rename_func("UNNEST"), 682 exp.IntDiv: lambda self, e: self.binary(e, "//"), 683 exp.IsInf: rename_func("ISINF"), 684 exp.IsNan: rename_func("ISNAN"), 685 exp.JSONBExists: rename_func("JSON_EXISTS"), 686 exp.JSONExtract: _arrow_json_extract_sql, 687 exp.JSONExtractArray: _json_extract_value_array_sql, 688 exp.JSONExtractScalar: _arrow_json_extract_sql, 689 exp.JSONFormat: _json_format_sql, 690 exp.JSONValueArray: _json_extract_value_array_sql, 691 exp.Lateral: explode_to_unnest_sql, 692 exp.LogicalOr: rename_func("BOOL_OR"), 693 exp.LogicalAnd: rename_func("BOOL_AND"), 694 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 695 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 696 exp.MonthsBetween: lambda self, e: self.func( 697 "DATEDIFF", 698 "'month'", 699 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 700 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 701 ), 702 exp.PercentileCont: rename_func("QUANTILE_CONT"), 703 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 704 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 705 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 706 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 707 exp.RegexpReplace: lambda self, e: self.func( 708 "REGEXP_REPLACE", 709 e.this, 710 e.expression, 711 e.args.get("replacement"), 712 e.args.get("modifiers"), 713 ), 714 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 715 exp.RegexpILike: lambda self, e: self.func( 716 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 717 ), 718 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 719 exp.Return: lambda self, e: self.sql(e, "this"), 720 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 721 exp.Rand: rename_func("RANDOM"), 722 exp.SHA: rename_func("SHA1"), 723 exp.SHA2: sha256_sql, 724 exp.Split: rename_func("STR_SPLIT"), 725 exp.SortArray: _sort_array_sql, 726 exp.StrPosition: strposition_sql, 727 exp.StrToUnix: lambda self, e: self.func( 728 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 729 ), 730 exp.Struct: _struct_sql, 731 exp.Transform: rename_func("LIST_TRANSFORM"), 732 exp.TimeAdd: date_delta_to_binary_interval_op(), 733 exp.Time: no_time_sql, 734 exp.TimeDiff: _timediff_sql, 735 exp.Timestamp: no_timestamp_sql, 736 exp.TimestampDiff: lambda self, e: self.func( 737 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 738 ), 739 exp.TimestampTrunc: timestamptrunc_sql(), 740 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 741 exp.TimeStrToTime: timestrtotime_sql, 742 exp.TimeStrToUnix: lambda self, e: self.func( 743 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 744 ), 745 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 746 exp.TimeToUnix: rename_func("EPOCH"), 747 exp.TsOrDiToDi: lambda self, 748 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 749 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 750 exp.TsOrDsDiff: lambda self, e: self.func( 751 "DATE_DIFF", 752 f"'{e.args.get('unit') or 'DAY'}'", 753 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 754 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 755 ), 756 exp.UnixToStr: lambda self, e: self.func( 757 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 758 ), 759 exp.DatetimeTrunc: lambda self, e: self.func( 760 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 761 ), 762 exp.UnixToTime: _unix_to_time_sql, 763 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 764 exp.VariancePop: rename_func("VAR_POP"), 765 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 766 exp.Xor: bool_xor_sql, 767 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 768 rename_func("LEVENSHTEIN") 769 ), 770 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 771 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 772 exp.DateBin: rename_func("TIME_BUCKET"), 773 } 774 775 SUPPORTED_JSON_PATH_PARTS = { 776 exp.JSONPathKey, 777 exp.JSONPathRoot, 778 exp.JSONPathSubscript, 779 exp.JSONPathWildcard, 780 } 781 782 TYPE_MAPPING = { 783 **generator.Generator.TYPE_MAPPING, 784 exp.DataType.Type.BINARY: "BLOB", 785 exp.DataType.Type.BPCHAR: "TEXT", 786 exp.DataType.Type.CHAR: "TEXT", 787 exp.DataType.Type.DATETIME: "TIMESTAMP", 788 exp.DataType.Type.FLOAT: "REAL", 789 exp.DataType.Type.JSONB: "JSON", 790 exp.DataType.Type.NCHAR: "TEXT", 791 exp.DataType.Type.NVARCHAR: "TEXT", 792 exp.DataType.Type.UINT: "UINTEGER", 793 exp.DataType.Type.VARBINARY: "BLOB", 794 exp.DataType.Type.ROWVERSION: "BLOB", 795 exp.DataType.Type.VARCHAR: "TEXT", 796 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 797 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 798 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 799 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 800 } 801 802 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 803 RESERVED_KEYWORDS = { 804 "array", 805 "analyse", 806 "union", 807 "all", 808 "when", 809 "in_p", 810 "default", 811 "create_p", 812 "window", 813 "asymmetric", 814 "to", 815 "else", 816 "localtime", 817 "from", 818 "end_p", 819 "select", 820 "current_date", 821 "foreign", 822 "with", 823 "grant", 824 "session_user", 825 "or", 826 "except", 827 "references", 828 "fetch", 829 "limit", 830 "group_p", 831 "leading", 832 "into", 833 "collate", 834 "offset", 835 "do", 836 "then", 837 "localtimestamp", 838 "check_p", 839 "lateral_p", 840 "current_role", 841 "where", 842 "asc_p", 843 "placing", 844 "desc_p", 845 "user", 846 "unique", 847 "initially", 848 "column", 849 "both", 850 "some", 851 "as", 852 "any", 853 "only", 854 "deferrable", 855 "null_p", 856 "current_time", 857 "true_p", 858 "table", 859 "case", 860 "trailing", 861 "variadic", 862 "for", 863 "on", 864 "distinct", 865 "false_p", 866 "not", 867 "constraint", 868 "current_timestamp", 869 "returning", 870 "primary", 871 "intersect", 872 "having", 873 "analyze", 874 "current_user", 875 "and", 876 "cast", 877 "symmetric", 878 "using", 879 "order", 880 "current_catalog", 881 } 882 883 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 884 885 # DuckDB doesn't generally support CREATE TABLE .. properties 886 # https://duckdb.org/docs/sql/statements/create_table.html 887 PROPERTIES_LOCATION = { 888 prop: exp.Properties.Location.UNSUPPORTED 889 for prop in generator.Generator.PROPERTIES_LOCATION 890 } 891 892 # There are a few exceptions (e.g. temporary tables) which are supported or 893 # can be transpiled to DuckDB, so we explicitly override them accordingly 894 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 895 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 896 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 897 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 898 899 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 900 exp.FirstValue, 901 exp.Lag, 902 exp.LastValue, 903 exp.Lead, 904 exp.NthValue, 905 ) 906 907 def lambda_sql( 908 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 909 ) -> str: 910 if expression.args.get("colon"): 911 prefix = "LAMBDA " 912 arrow_sep = ":" 913 wrap = False 914 else: 915 prefix = "" 916 917 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 918 return f"{prefix}{lambda_sql}" 919 920 def show_sql(self, expression: exp.Show) -> str: 921 return f"SHOW {expression.name}" 922 923 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 924 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 925 926 def strtotime_sql(self, expression: exp.StrToTime) -> str: 927 if expression.args.get("safe"): 928 formatted_time = self.format_time(expression) 929 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 930 return str_to_time_sql(self, expression) 931 932 def strtodate_sql(self, expression: exp.StrToDate) -> str: 933 if expression.args.get("safe"): 934 formatted_time = self.format_time(expression) 935 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 936 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 937 938 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 939 arg = expression.this 940 if expression.args.get("safe"): 941 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 942 return self.func("JSON", arg) 943 944 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 945 nano = expression.args.get("nano") 946 if nano is not None: 947 expression.set( 948 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 949 ) 950 951 return rename_func("MAKE_TIME")(self, expression) 952 953 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 954 sec = expression.args["sec"] 955 956 milli = expression.args.get("milli") 957 if milli is not None: 958 sec += milli.pop() / exp.Literal.number(1000.0) 959 960 nano = expression.args.get("nano") 961 if nano is not None: 962 sec += nano.pop() / exp.Literal.number(1000000000.0) 963 964 if milli or nano: 965 expression.set("sec", sec) 966 967 return rename_func("MAKE_TIMESTAMP")(self, expression) 968 969 def tablesample_sql( 970 self, 971 expression: exp.TableSample, 972 tablesample_keyword: t.Optional[str] = None, 973 ) -> str: 974 if not isinstance(expression.parent, exp.Select): 975 # This sample clause only applies to a single source, not the entire resulting relation 976 tablesample_keyword = "TABLESAMPLE" 977 978 if expression.args.get("size"): 979 method = expression.args.get("method") 980 if method and method.name.upper() != "RESERVOIR": 981 self.unsupported( 982 f"Sampling method {method} is not supported with a discrete sample count, " 983 "defaulting to reservoir sampling" 984 ) 985 expression.set("method", exp.var("RESERVOIR")) 986 987 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 988 989 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 990 if isinstance(expression.parent, exp.UserDefinedFunction): 991 return self.sql(expression, "this") 992 return super().columndef_sql(expression, sep) 993 994 def join_sql(self, expression: exp.Join) -> str: 995 if ( 996 expression.side == "LEFT" 997 and not expression.args.get("on") 998 and isinstance(expression.this, exp.Unnest) 999 ): 1000 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1001 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1002 return super().join_sql(expression.on(exp.true())) 1003 1004 return super().join_sql(expression) 1005 1006 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1007 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1008 if expression.args.get("is_end_exclusive"): 1009 return rename_func("RANGE")(self, expression) 1010 1011 return self.function_fallback_sql(expression) 1012 1013 def countif_sql(self, expression: exp.CountIf) -> str: 1014 if self.dialect.version >= Version("1.2"): 1015 return self.function_fallback_sql(expression) 1016 1017 # https://github.com/tobymao/sqlglot/pull/4749 1018 return count_if_to_sum(self, expression) 1019 1020 def bracket_sql(self, expression: exp.Bracket) -> str: 1021 if self.dialect.version >= Version("1.2"): 1022 return super().bracket_sql(expression) 1023 1024 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1025 this = expression.this 1026 if isinstance(this, exp.Array): 1027 this.replace(exp.paren(this)) 1028 1029 bracket = super().bracket_sql(expression) 1030 1031 if not expression.args.get("returns_list_for_maps"): 1032 if not this.type: 1033 from sqlglot.optimizer.annotate_types import annotate_types 1034 1035 this = annotate_types(this, dialect=self.dialect) 1036 1037 if this.is_type(exp.DataType.Type.MAP): 1038 bracket = f"({bracket})[1]" 1039 1040 return bracket 1041 1042 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1043 expression_sql = self.sql(expression, "expression") 1044 1045 func = expression.this 1046 if isinstance(func, exp.PERCENTILES): 1047 # Make the order key the first arg and slide the fraction to the right 1048 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1049 order_col = expression.find(exp.Ordered) 1050 if order_col: 1051 func.set("expression", func.this) 1052 func.set("this", order_col.this) 1053 1054 this = self.sql(expression, "this").rstrip(")") 1055 1056 return f"{this}{expression_sql})" 1057 1058 def length_sql(self, expression: exp.Length) -> str: 1059 arg = expression.this 1060 1061 # Dialects like BQ and Snowflake also accept binary values as args, so 1062 # DDB will attempt to infer the type or resort to case/when resolution 1063 if not expression.args.get("binary") or arg.is_string: 1064 return self.func("LENGTH", arg) 1065 1066 if not arg.type: 1067 from sqlglot.optimizer.annotate_types import annotate_types 1068 1069 arg = annotate_types(arg, dialect=self.dialect) 1070 1071 if arg.is_type(*exp.DataType.TEXT_TYPES): 1072 return self.func("LENGTH", arg) 1073 1074 # We need these casts to make duckdb's static type checker happy 1075 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1076 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1077 1078 case = ( 1079 exp.case(self.func("TYPEOF", arg)) 1080 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1081 .else_( 1082 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1083 ) # anonymous to break length_sql recursion 1084 ) 1085 1086 return self.sql(case) 1087 1088 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1089 this = expression.this 1090 key = expression.args.get("key") 1091 key_sql = key.name if isinstance(key, exp.Expression) else "" 1092 value_sql = self.sql(expression, "value") 1093 1094 kv_sql = f"{key_sql} := {value_sql}" 1095 1096 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1097 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1098 if isinstance(this, exp.Struct) and not this.expressions: 1099 return self.func("STRUCT_PACK", kv_sql) 1100 1101 return self.func("STRUCT_INSERT", this, kv_sql) 1102 1103 def unnest_sql(self, expression: exp.Unnest) -> str: 1104 explode_array = expression.args.get("explode_array") 1105 if explode_array: 1106 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1107 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1108 expression.expressions.append( 1109 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1110 ) 1111 1112 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1113 alias = expression.args.get("alias") 1114 if isinstance(alias, exp.TableAlias): 1115 expression.set("alias", None) 1116 if alias.columns: 1117 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1118 1119 unnest_sql = super().unnest_sql(expression) 1120 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1121 return self.sql(select) 1122 1123 return super().unnest_sql(expression) 1124 1125 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1126 this = expression.this 1127 1128 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1129 # DuckDB should render IGNORE NULLS only for the general-purpose 1130 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1131 return super().ignorenulls_sql(expression) 1132 1133 if isinstance(this, exp.First): 1134 this = exp.AnyValue(this=this.this) 1135 1136 if not isinstance(this, exp.AnyValue): 1137 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1138 1139 return self.sql(this) 1140 1141 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1142 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1143 # DuckDB should render RESPECT NULLS only for the general-purpose 1144 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1145 return super().respectnulls_sql(expression) 1146 1147 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1148 return self.sql(expression, "this") 1149 1150 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1151 this = self.sql(expression, "this") 1152 null_text = self.sql(expression, "null") 1153 1154 if null_text: 1155 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1156 1157 return self.func("ARRAY_TO_STRING", this, expression.expression) 1158 1159 @unsupported_args("position", "occurrence") 1160 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1161 group = expression.args.get("group") 1162 params = expression.args.get("parameters") 1163 1164 # Do not render group if there is no following argument, 1165 # and it's the default value for this dialect 1166 if ( 1167 not params 1168 and group 1169 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1170 ): 1171 group = None 1172 return self.func( 1173 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1174 ) 1175 1176 @unsupported_args("culture") 1177 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1178 fmt = expression.args.get("format") 1179 if fmt and fmt.is_int: 1180 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1181 1182 self.unsupported("Only integer formats are supported by NumberToStr") 1183 return self.function_fallback_sql(expression) 1184 1185 def autoincrementcolumnconstraint_sql(self, _) -> str: 1186 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1187 return "" 1188 1189 def aliases_sql(self, expression: exp.Aliases) -> str: 1190 this = expression.this 1191 if isinstance(this, exp.Posexplode): 1192 return self.posexplode_sql(this) 1193 1194 return super().aliases_sql(expression) 1195 1196 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1197 this = expression.this 1198 parent = expression.parent 1199 1200 # The default Spark aliases are "pos" and "col", unless specified otherwise 1201 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1202 1203 if isinstance(parent, exp.Aliases): 1204 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1205 pos, col = parent.expressions 1206 elif isinstance(parent, exp.Table): 1207 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1208 alias = parent.args.get("alias") 1209 if alias: 1210 pos, col = alias.columns or [pos, col] 1211 alias.pop() 1212 1213 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1214 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1215 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1216 gen_subscripts = self.sql( 1217 exp.Alias( 1218 this=exp.Anonymous( 1219 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1220 ) 1221 - exp.Literal.number(1), 1222 alias=pos, 1223 ) 1224 ) 1225 1226 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1227 1228 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1229 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1230 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1231 1232 return posexplode_sql 1233 1234 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1235 this = expression.this 1236 1237 if not this.type: 1238 from sqlglot.optimizer.annotate_types import annotate_types 1239 1240 this = annotate_types(this, dialect=self.dialect) 1241 1242 if this.is_type(*exp.DataType.TEXT_TYPES): 1243 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1244 1245 func = self.func( 1246 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1247 ) 1248 1249 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1250 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1251 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1252 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1253 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1254 return self.sql(exp.Cast(this=func, to=this.type)) 1255 1256 return self.sql(func)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path)
287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "EXCLUDE": TokenType.EXCEPT, 309 "LOGICAL": TokenType.BOOLEAN, 310 "ONLY": TokenType.ONLY, 311 "PIVOT_WIDER": TokenType.PIVOT, 312 "POSITIONAL": TokenType.POSITIONAL, 313 "RESET": TokenType.COMMAND, 314 "SIGNED": TokenType.INT, 315 "STRING": TokenType.TEXT, 316 "SUMMARIZE": TokenType.SUMMARIZE, 317 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 318 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 319 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 320 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 321 "TIMESTAMP_US": TokenType.TIMESTAMP, 322 "UBIGINT": TokenType.UBIGINT, 323 "UINTEGER": TokenType.UINT, 324 "USMALLINT": TokenType.USMALLINT, 325 "UTINYINT": TokenType.UTINYINT, 326 "VARCHAR": TokenType.TEXT, 327 } 328 KEYWORDS.pop("/*+") 329 330 SINGLE_TOKENS = { 331 **tokens.Tokenizer.SINGLE_TOKENS, 332 "$": TokenType.PARAMETER, 333 } 334 335 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
337 class Parser(parser.Parser): 338 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 339 340 BITWISE = { 341 **parser.Parser.BITWISE, 342 TokenType.TILDA: exp.RegexpLike, 343 } 344 BITWISE.pop(TokenType.CARET) 345 346 RANGE_PARSERS = { 347 **parser.Parser.RANGE_PARSERS, 348 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 349 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 350 } 351 352 EXPONENT = { 353 **parser.Parser.EXPONENT, 354 TokenType.CARET: exp.Pow, 355 TokenType.DSTAR: exp.Pow, 356 } 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 359 360 SHOW_PARSERS = { 361 "TABLES": _show_parser("TABLES"), 362 "ALL TABLES": _show_parser("ALL TABLES"), 363 } 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "EDITDIST3": exp.Levenshtein.from_arg_list, 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "GENERATE_SERIES": _build_generate_series(), 386 "JSON": exp.ParseJSON.from_arg_list, 387 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 388 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 389 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 390 "LIST_FILTER": exp.ArrayFilter.from_arg_list, 391 "LIST_HAS": exp.ArrayContains.from_arg_list, 392 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 393 "LIST_REVERSE_SORT": _build_sort_array_desc, 394 "LIST_SORT": exp.SortArray.from_arg_list, 395 "LIST_TRANSFORM": exp.Transform.from_arg_list, 396 "LIST_VALUE": lambda args: exp.Array(expressions=args), 397 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 398 "MAKE_TIMESTAMP": _build_make_timestamp, 399 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 400 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 401 "RANGE": _build_generate_series(end_exclusive=True), 402 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 403 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 404 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 405 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 406 this=seq_get(args, 0), 407 expression=seq_get(args, 1), 408 replacement=seq_get(args, 2), 409 modifiers=seq_get(args, 3), 410 ), 411 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 412 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 413 "STRING_SPLIT": exp.Split.from_arg_list, 414 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 415 "STRING_TO_ARRAY": exp.Split.from_arg_list, 416 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 417 "STRUCT_PACK": exp.Struct.from_arg_list, 418 "STR_SPLIT": exp.Split.from_arg_list, 419 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 420 "TIME_BUCKET": exp.DateBin.from_arg_list, 421 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 422 "UNNEST": exp.Explode.from_arg_list, 423 "XOR": binary_from_function(exp.BitwiseXor), 424 } 425 426 FUNCTIONS.pop("DATE_SUB") 427 FUNCTIONS.pop("GLOB") 428 429 FUNCTION_PARSERS = { 430 **parser.Parser.FUNCTION_PARSERS, 431 **dict.fromkeys( 432 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 433 ), 434 } 435 FUNCTION_PARSERS.pop("DECODE") 436 437 NO_PAREN_FUNCTION_PARSERS = { 438 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 439 "MAP": lambda self: self._parse_map(), 440 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 441 } 442 443 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 444 TokenType.SEMI, 445 TokenType.ANTI, 446 } 447 448 PLACEHOLDER_PARSERS = { 449 **parser.Parser.PLACEHOLDER_PARSERS, 450 TokenType.PARAMETER: lambda self: ( 451 self.expression(exp.Placeholder, this=self._prev.text) 452 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 453 else None 454 ), 455 } 456 457 TYPE_CONVERTERS = { 458 # https://duckdb.org/docs/sql/data_types/numeric 459 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 460 # https://duckdb.org/docs/sql/data_types/text 461 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 462 } 463 464 STATEMENT_PARSERS = { 465 **parser.Parser.STATEMENT_PARSERS, 466 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 467 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 468 TokenType.SHOW: lambda self: self._parse_show(), 469 } 470 471 SET_PARSERS = { 472 **parser.Parser.SET_PARSERS, 473 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 474 } 475 476 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 477 index = self._index 478 if not self._match_text_seq("LAMBDA"): 479 return super()._parse_lambda(alias=alias) 480 481 expressions = self._parse_csv(self._parse_lambda_arg) 482 if not self._match(TokenType.COLON): 483 self._retreat(index) 484 return None 485 486 this = self._replace_lambda(self._parse_assignment(), expressions) 487 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 488 489 def _parse_expression(self) -> t.Optional[exp.Expression]: 490 # DuckDB supports prefix aliases, e.g. foo: 1 491 if self._next and self._next.token_type == TokenType.COLON: 492 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 493 self._match(TokenType.COLON) 494 comments = self._prev_comments or [] 495 496 this = self._parse_assignment() 497 if isinstance(this, exp.Expression): 498 # Moves the comment next to the alias in `alias: expr /* comment */` 499 comments += this.pop_comments() or [] 500 501 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 502 503 return super()._parse_expression() 504 505 def _parse_table( 506 self, 507 schema: bool = False, 508 joins: bool = False, 509 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 510 parse_bracket: bool = False, 511 is_db_reference: bool = False, 512 parse_partition: bool = False, 513 consume_pipe: bool = False, 514 ) -> t.Optional[exp.Expression]: 515 # DuckDB supports prefix aliases, e.g. FROM foo: bar 516 if self._next and self._next.token_type == TokenType.COLON: 517 alias = self._parse_table_alias( 518 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 519 ) 520 self._match(TokenType.COLON) 521 comments = self._prev_comments or [] 522 else: 523 alias = None 524 comments = [] 525 526 table = super()._parse_table( 527 schema=schema, 528 joins=joins, 529 alias_tokens=alias_tokens, 530 parse_bracket=parse_bracket, 531 is_db_reference=is_db_reference, 532 parse_partition=parse_partition, 533 ) 534 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 535 # Moves the comment next to the alias in `alias: table /* comment */` 536 comments += table.pop_comments() or [] 537 alias.comments = alias.pop_comments() + comments 538 table.set("alias", alias) 539 540 return table 541 542 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 543 # https://duckdb.org/docs/sql/samples.html 544 sample = super()._parse_table_sample(as_modifier=as_modifier) 545 if sample and not sample.args.get("method"): 546 if sample.args.get("size"): 547 sample.set("method", exp.var("RESERVOIR")) 548 else: 549 sample.set("method", exp.var("SYSTEM")) 550 551 return sample 552 553 def _parse_bracket( 554 self, this: t.Optional[exp.Expression] = None 555 ) -> t.Optional[exp.Expression]: 556 bracket = super()._parse_bracket(this) 557 558 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 559 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 560 bracket.set("returns_list_for_maps", True) 561 562 return bracket 563 564 def _parse_map(self) -> exp.ToMap | exp.Map: 565 if self._match(TokenType.L_BRACE, advance=False): 566 return self.expression(exp.ToMap, this=self._parse_bracket()) 567 568 args = self._parse_wrapped_csv(self._parse_assignment) 569 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 570 571 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 572 return self._parse_field_def() 573 574 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 575 if len(aggregations) == 1: 576 return super()._pivot_column_names(aggregations) 577 return pivot_column_names(aggregations, dialect="duckdb") 578 579 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 580 def _parse_attach_option() -> exp.AttachOption: 581 return self.expression( 582 exp.AttachOption, 583 this=self._parse_var(any_token=True), 584 expression=self._parse_field(any_token=True), 585 ) 586 587 self._match(TokenType.DATABASE) 588 exists = self._parse_exists(not_=is_attach) 589 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 590 591 if self._match(TokenType.L_PAREN, advance=False): 592 expressions = self._parse_wrapped_csv(_parse_attach_option) 593 else: 594 expressions = None 595 596 return ( 597 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 598 if is_attach 599 else self.expression(exp.Detach, this=this, exists=exists) 600 ) 601 602 def _parse_show_duckdb(self, this: str) -> exp.Show: 603 return self.expression(exp.Show, this=this) 604 605 def _parse_primary(self) -> t.Optional[exp.Expression]: 606 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 607 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 608 609 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
611 class Generator(generator.Generator): 612 PARAMETER_TOKEN = "$" 613 NAMED_PLACEHOLDER_TOKEN = "$" 614 JOIN_HINTS = False 615 TABLE_HINTS = False 616 QUERY_HINTS = False 617 LIMIT_FETCH = "LIMIT" 618 STRUCT_DELIMITER = ("(", ")") 619 RENAME_TABLE_WITH_DB = False 620 NVL2_SUPPORTED = False 621 SEMI_ANTI_JOIN_WITH_SIDE = False 622 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 623 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 624 LAST_DAY_SUPPORTS_DATE_PART = False 625 JSON_KEY_VALUE_PAIR_SEP = "," 626 IGNORE_NULLS_IN_FUNC = True 627 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 628 SUPPORTS_CREATE_TABLE_LIKE = False 629 MULTI_ARG_DISTINCT = False 630 CAN_IMPLEMENT_ARRAY_ANY = True 631 SUPPORTS_TO_NUMBER = False 632 SUPPORTS_WINDOW_EXCLUDE = True 633 COPY_HAS_INTO_KEYWORD = False 634 STAR_EXCEPT = "EXCLUDE" 635 PAD_FILL_PATTERN_IS_REQUIRED = True 636 ARRAY_CONCAT_IS_VAR_LEN = False 637 ARRAY_SIZE_DIM_REQUIRED = False 638 NORMALIZE_EXTRACT_DATE_PARTS = True 639 SUPPORTS_LIKE_QUANTIFIERS = False 640 641 TRANSFORMS = { 642 **generator.Generator.TRANSFORMS, 643 exp.ApproxDistinct: approx_count_distinct_sql, 644 exp.Array: inline_array_unless_query, 645 exp.ArrayFilter: rename_func("LIST_FILTER"), 646 exp.ArrayRemove: remove_from_array_using_filter, 647 exp.ArraySort: _array_sort_sql, 648 exp.ArraySum: rename_func("LIST_SUM"), 649 exp.ArrayUniqueAgg: lambda self, e: self.func( 650 "LIST", exp.Distinct(expressions=[e.this]) 651 ), 652 exp.BitwiseXor: rename_func("XOR"), 653 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 654 exp.CurrentDate: lambda *_: "CURRENT_DATE", 655 exp.CurrentTime: lambda *_: "CURRENT_TIME", 656 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 657 exp.DayOfMonth: rename_func("DAYOFMONTH"), 658 exp.DayOfWeek: rename_func("DAYOFWEEK"), 659 exp.DayOfWeekIso: rename_func("ISODOW"), 660 exp.DayOfYear: rename_func("DAYOFYEAR"), 661 exp.DataType: _datatype_sql, 662 exp.Date: _date_sql, 663 exp.DateAdd: date_delta_to_binary_interval_op(), 664 exp.DateFromParts: rename_func("MAKE_DATE"), 665 exp.DateSub: date_delta_to_binary_interval_op(), 666 exp.DateDiff: _date_diff_sql, 667 exp.DateStrToDate: datestrtodate_sql, 668 exp.Datetime: no_datetime_sql, 669 exp.DatetimeSub: date_delta_to_binary_interval_op(), 670 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 671 exp.DateToDi: lambda self, 672 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 673 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 674 exp.DiToDate: lambda self, 675 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 676 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 677 exp.GenerateDateArray: _generate_datetime_array_sql, 678 exp.GenerateTimestampArray: _generate_datetime_array_sql, 679 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 680 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 681 exp.Explode: rename_func("UNNEST"), 682 exp.IntDiv: lambda self, e: self.binary(e, "//"), 683 exp.IsInf: rename_func("ISINF"), 684 exp.IsNan: rename_func("ISNAN"), 685 exp.JSONBExists: rename_func("JSON_EXISTS"), 686 exp.JSONExtract: _arrow_json_extract_sql, 687 exp.JSONExtractArray: _json_extract_value_array_sql, 688 exp.JSONExtractScalar: _arrow_json_extract_sql, 689 exp.JSONFormat: _json_format_sql, 690 exp.JSONValueArray: _json_extract_value_array_sql, 691 exp.Lateral: explode_to_unnest_sql, 692 exp.LogicalOr: rename_func("BOOL_OR"), 693 exp.LogicalAnd: rename_func("BOOL_AND"), 694 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 695 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 696 exp.MonthsBetween: lambda self, e: self.func( 697 "DATEDIFF", 698 "'month'", 699 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 700 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 701 ), 702 exp.PercentileCont: rename_func("QUANTILE_CONT"), 703 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 704 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 705 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 706 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 707 exp.RegexpReplace: lambda self, e: self.func( 708 "REGEXP_REPLACE", 709 e.this, 710 e.expression, 711 e.args.get("replacement"), 712 e.args.get("modifiers"), 713 ), 714 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 715 exp.RegexpILike: lambda self, e: self.func( 716 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 717 ), 718 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 719 exp.Return: lambda self, e: self.sql(e, "this"), 720 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 721 exp.Rand: rename_func("RANDOM"), 722 exp.SHA: rename_func("SHA1"), 723 exp.SHA2: sha256_sql, 724 exp.Split: rename_func("STR_SPLIT"), 725 exp.SortArray: _sort_array_sql, 726 exp.StrPosition: strposition_sql, 727 exp.StrToUnix: lambda self, e: self.func( 728 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 729 ), 730 exp.Struct: _struct_sql, 731 exp.Transform: rename_func("LIST_TRANSFORM"), 732 exp.TimeAdd: date_delta_to_binary_interval_op(), 733 exp.Time: no_time_sql, 734 exp.TimeDiff: _timediff_sql, 735 exp.Timestamp: no_timestamp_sql, 736 exp.TimestampDiff: lambda self, e: self.func( 737 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 738 ), 739 exp.TimestampTrunc: timestamptrunc_sql(), 740 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 741 exp.TimeStrToTime: timestrtotime_sql, 742 exp.TimeStrToUnix: lambda self, e: self.func( 743 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 744 ), 745 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 746 exp.TimeToUnix: rename_func("EPOCH"), 747 exp.TsOrDiToDi: lambda self, 748 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 749 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 750 exp.TsOrDsDiff: lambda self, e: self.func( 751 "DATE_DIFF", 752 f"'{e.args.get('unit') or 'DAY'}'", 753 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 754 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 755 ), 756 exp.UnixToStr: lambda self, e: self.func( 757 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 758 ), 759 exp.DatetimeTrunc: lambda self, e: self.func( 760 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 761 ), 762 exp.UnixToTime: _unix_to_time_sql, 763 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 764 exp.VariancePop: rename_func("VAR_POP"), 765 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 766 exp.Xor: bool_xor_sql, 767 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 768 rename_func("LEVENSHTEIN") 769 ), 770 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 771 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 772 exp.DateBin: rename_func("TIME_BUCKET"), 773 } 774 775 SUPPORTED_JSON_PATH_PARTS = { 776 exp.JSONPathKey, 777 exp.JSONPathRoot, 778 exp.JSONPathSubscript, 779 exp.JSONPathWildcard, 780 } 781 782 TYPE_MAPPING = { 783 **generator.Generator.TYPE_MAPPING, 784 exp.DataType.Type.BINARY: "BLOB", 785 exp.DataType.Type.BPCHAR: "TEXT", 786 exp.DataType.Type.CHAR: "TEXT", 787 exp.DataType.Type.DATETIME: "TIMESTAMP", 788 exp.DataType.Type.FLOAT: "REAL", 789 exp.DataType.Type.JSONB: "JSON", 790 exp.DataType.Type.NCHAR: "TEXT", 791 exp.DataType.Type.NVARCHAR: "TEXT", 792 exp.DataType.Type.UINT: "UINTEGER", 793 exp.DataType.Type.VARBINARY: "BLOB", 794 exp.DataType.Type.ROWVERSION: "BLOB", 795 exp.DataType.Type.VARCHAR: "TEXT", 796 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 797 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 798 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 799 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 800 } 801 802 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 803 RESERVED_KEYWORDS = { 804 "array", 805 "analyse", 806 "union", 807 "all", 808 "when", 809 "in_p", 810 "default", 811 "create_p", 812 "window", 813 "asymmetric", 814 "to", 815 "else", 816 "localtime", 817 "from", 818 "end_p", 819 "select", 820 "current_date", 821 "foreign", 822 "with", 823 "grant", 824 "session_user", 825 "or", 826 "except", 827 "references", 828 "fetch", 829 "limit", 830 "group_p", 831 "leading", 832 "into", 833 "collate", 834 "offset", 835 "do", 836 "then", 837 "localtimestamp", 838 "check_p", 839 "lateral_p", 840 "current_role", 841 "where", 842 "asc_p", 843 "placing", 844 "desc_p", 845 "user", 846 "unique", 847 "initially", 848 "column", 849 "both", 850 "some", 851 "as", 852 "any", 853 "only", 854 "deferrable", 855 "null_p", 856 "current_time", 857 "true_p", 858 "table", 859 "case", 860 "trailing", 861 "variadic", 862 "for", 863 "on", 864 "distinct", 865 "false_p", 866 "not", 867 "constraint", 868 "current_timestamp", 869 "returning", 870 "primary", 871 "intersect", 872 "having", 873 "analyze", 874 "current_user", 875 "and", 876 "cast", 877 "symmetric", 878 "using", 879 "order", 880 "current_catalog", 881 } 882 883 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 884 885 # DuckDB doesn't generally support CREATE TABLE .. properties 886 # https://duckdb.org/docs/sql/statements/create_table.html 887 PROPERTIES_LOCATION = { 888 prop: exp.Properties.Location.UNSUPPORTED 889 for prop in generator.Generator.PROPERTIES_LOCATION 890 } 891 892 # There are a few exceptions (e.g. temporary tables) which are supported or 893 # can be transpiled to DuckDB, so we explicitly override them accordingly 894 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 895 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 896 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 897 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 898 899 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 900 exp.FirstValue, 901 exp.Lag, 902 exp.LastValue, 903 exp.Lead, 904 exp.NthValue, 905 ) 906 907 def lambda_sql( 908 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 909 ) -> str: 910 if expression.args.get("colon"): 911 prefix = "LAMBDA " 912 arrow_sep = ":" 913 wrap = False 914 else: 915 prefix = "" 916 917 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 918 return f"{prefix}{lambda_sql}" 919 920 def show_sql(self, expression: exp.Show) -> str: 921 return f"SHOW {expression.name}" 922 923 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 924 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 925 926 def strtotime_sql(self, expression: exp.StrToTime) -> str: 927 if expression.args.get("safe"): 928 formatted_time = self.format_time(expression) 929 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 930 return str_to_time_sql(self, expression) 931 932 def strtodate_sql(self, expression: exp.StrToDate) -> str: 933 if expression.args.get("safe"): 934 formatted_time = self.format_time(expression) 935 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 936 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 937 938 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 939 arg = expression.this 940 if expression.args.get("safe"): 941 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 942 return self.func("JSON", arg) 943 944 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 945 nano = expression.args.get("nano") 946 if nano is not None: 947 expression.set( 948 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 949 ) 950 951 return rename_func("MAKE_TIME")(self, expression) 952 953 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 954 sec = expression.args["sec"] 955 956 milli = expression.args.get("milli") 957 if milli is not None: 958 sec += milli.pop() / exp.Literal.number(1000.0) 959 960 nano = expression.args.get("nano") 961 if nano is not None: 962 sec += nano.pop() / exp.Literal.number(1000000000.0) 963 964 if milli or nano: 965 expression.set("sec", sec) 966 967 return rename_func("MAKE_TIMESTAMP")(self, expression) 968 969 def tablesample_sql( 970 self, 971 expression: exp.TableSample, 972 tablesample_keyword: t.Optional[str] = None, 973 ) -> str: 974 if not isinstance(expression.parent, exp.Select): 975 # This sample clause only applies to a single source, not the entire resulting relation 976 tablesample_keyword = "TABLESAMPLE" 977 978 if expression.args.get("size"): 979 method = expression.args.get("method") 980 if method and method.name.upper() != "RESERVOIR": 981 self.unsupported( 982 f"Sampling method {method} is not supported with a discrete sample count, " 983 "defaulting to reservoir sampling" 984 ) 985 expression.set("method", exp.var("RESERVOIR")) 986 987 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 988 989 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 990 if isinstance(expression.parent, exp.UserDefinedFunction): 991 return self.sql(expression, "this") 992 return super().columndef_sql(expression, sep) 993 994 def join_sql(self, expression: exp.Join) -> str: 995 if ( 996 expression.side == "LEFT" 997 and not expression.args.get("on") 998 and isinstance(expression.this, exp.Unnest) 999 ): 1000 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1001 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1002 return super().join_sql(expression.on(exp.true())) 1003 1004 return super().join_sql(expression) 1005 1006 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1007 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1008 if expression.args.get("is_end_exclusive"): 1009 return rename_func("RANGE")(self, expression) 1010 1011 return self.function_fallback_sql(expression) 1012 1013 def countif_sql(self, expression: exp.CountIf) -> str: 1014 if self.dialect.version >= Version("1.2"): 1015 return self.function_fallback_sql(expression) 1016 1017 # https://github.com/tobymao/sqlglot/pull/4749 1018 return count_if_to_sum(self, expression) 1019 1020 def bracket_sql(self, expression: exp.Bracket) -> str: 1021 if self.dialect.version >= Version("1.2"): 1022 return super().bracket_sql(expression) 1023 1024 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1025 this = expression.this 1026 if isinstance(this, exp.Array): 1027 this.replace(exp.paren(this)) 1028 1029 bracket = super().bracket_sql(expression) 1030 1031 if not expression.args.get("returns_list_for_maps"): 1032 if not this.type: 1033 from sqlglot.optimizer.annotate_types import annotate_types 1034 1035 this = annotate_types(this, dialect=self.dialect) 1036 1037 if this.is_type(exp.DataType.Type.MAP): 1038 bracket = f"({bracket})[1]" 1039 1040 return bracket 1041 1042 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1043 expression_sql = self.sql(expression, "expression") 1044 1045 func = expression.this 1046 if isinstance(func, exp.PERCENTILES): 1047 # Make the order key the first arg and slide the fraction to the right 1048 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1049 order_col = expression.find(exp.Ordered) 1050 if order_col: 1051 func.set("expression", func.this) 1052 func.set("this", order_col.this) 1053 1054 this = self.sql(expression, "this").rstrip(")") 1055 1056 return f"{this}{expression_sql})" 1057 1058 def length_sql(self, expression: exp.Length) -> str: 1059 arg = expression.this 1060 1061 # Dialects like BQ and Snowflake also accept binary values as args, so 1062 # DDB will attempt to infer the type or resort to case/when resolution 1063 if not expression.args.get("binary") or arg.is_string: 1064 return self.func("LENGTH", arg) 1065 1066 if not arg.type: 1067 from sqlglot.optimizer.annotate_types import annotate_types 1068 1069 arg = annotate_types(arg, dialect=self.dialect) 1070 1071 if arg.is_type(*exp.DataType.TEXT_TYPES): 1072 return self.func("LENGTH", arg) 1073 1074 # We need these casts to make duckdb's static type checker happy 1075 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1076 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1077 1078 case = ( 1079 exp.case(self.func("TYPEOF", arg)) 1080 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1081 .else_( 1082 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1083 ) # anonymous to break length_sql recursion 1084 ) 1085 1086 return self.sql(case) 1087 1088 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1089 this = expression.this 1090 key = expression.args.get("key") 1091 key_sql = key.name if isinstance(key, exp.Expression) else "" 1092 value_sql = self.sql(expression, "value") 1093 1094 kv_sql = f"{key_sql} := {value_sql}" 1095 1096 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1097 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1098 if isinstance(this, exp.Struct) and not this.expressions: 1099 return self.func("STRUCT_PACK", kv_sql) 1100 1101 return self.func("STRUCT_INSERT", this, kv_sql) 1102 1103 def unnest_sql(self, expression: exp.Unnest) -> str: 1104 explode_array = expression.args.get("explode_array") 1105 if explode_array: 1106 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1107 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1108 expression.expressions.append( 1109 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1110 ) 1111 1112 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1113 alias = expression.args.get("alias") 1114 if isinstance(alias, exp.TableAlias): 1115 expression.set("alias", None) 1116 if alias.columns: 1117 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1118 1119 unnest_sql = super().unnest_sql(expression) 1120 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1121 return self.sql(select) 1122 1123 return super().unnest_sql(expression) 1124 1125 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1126 this = expression.this 1127 1128 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1129 # DuckDB should render IGNORE NULLS only for the general-purpose 1130 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1131 return super().ignorenulls_sql(expression) 1132 1133 if isinstance(this, exp.First): 1134 this = exp.AnyValue(this=this.this) 1135 1136 if not isinstance(this, exp.AnyValue): 1137 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1138 1139 return self.sql(this) 1140 1141 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1142 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1143 # DuckDB should render RESPECT NULLS only for the general-purpose 1144 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1145 return super().respectnulls_sql(expression) 1146 1147 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1148 return self.sql(expression, "this") 1149 1150 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1151 this = self.sql(expression, "this") 1152 null_text = self.sql(expression, "null") 1153 1154 if null_text: 1155 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1156 1157 return self.func("ARRAY_TO_STRING", this, expression.expression) 1158 1159 @unsupported_args("position", "occurrence") 1160 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1161 group = expression.args.get("group") 1162 params = expression.args.get("parameters") 1163 1164 # Do not render group if there is no following argument, 1165 # and it's the default value for this dialect 1166 if ( 1167 not params 1168 and group 1169 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1170 ): 1171 group = None 1172 return self.func( 1173 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1174 ) 1175 1176 @unsupported_args("culture") 1177 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1178 fmt = expression.args.get("format") 1179 if fmt and fmt.is_int: 1180 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1181 1182 self.unsupported("Only integer formats are supported by NumberToStr") 1183 return self.function_fallback_sql(expression) 1184 1185 def autoincrementcolumnconstraint_sql(self, _) -> str: 1186 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1187 return "" 1188 1189 def aliases_sql(self, expression: exp.Aliases) -> str: 1190 this = expression.this 1191 if isinstance(this, exp.Posexplode): 1192 return self.posexplode_sql(this) 1193 1194 return super().aliases_sql(expression) 1195 1196 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1197 this = expression.this 1198 parent = expression.parent 1199 1200 # The default Spark aliases are "pos" and "col", unless specified otherwise 1201 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1202 1203 if isinstance(parent, exp.Aliases): 1204 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1205 pos, col = parent.expressions 1206 elif isinstance(parent, exp.Table): 1207 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1208 alias = parent.args.get("alias") 1209 if alias: 1210 pos, col = alias.columns or [pos, col] 1211 alias.pop() 1212 1213 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1214 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1215 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1216 gen_subscripts = self.sql( 1217 exp.Alias( 1218 this=exp.Anonymous( 1219 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1220 ) 1221 - exp.Literal.number(1), 1222 alias=pos, 1223 ) 1224 ) 1225 1226 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1227 1228 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1229 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1230 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1231 1232 return posexplode_sql 1233 1234 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1235 this = expression.this 1236 1237 if not this.type: 1238 from sqlglot.optimizer.annotate_types import annotate_types 1239 1240 this = annotate_types(this, dialect=self.dialect) 1241 1242 if this.is_type(*exp.DataType.TEXT_TYPES): 1243 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1244 1245 func = self.func( 1246 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1247 ) 1248 1249 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1250 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1251 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1252 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1253 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1254 return self.sql(exp.Cast(this=func, to=this.type)) 1255 1256 return self.sql(func)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
907 def lambda_sql( 908 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 909 ) -> str: 910 if expression.args.get("colon"): 911 prefix = "LAMBDA " 912 arrow_sep = ":" 913 wrap = False 914 else: 915 prefix = "" 916 917 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 918 return f"{prefix}{lambda_sql}"
932 def strtodate_sql(self, expression: exp.StrToDate) -> str: 933 if expression.args.get("safe"): 934 formatted_time = self.format_time(expression) 935 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 936 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
944 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 945 nano = expression.args.get("nano") 946 if nano is not None: 947 expression.set( 948 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 949 ) 950 951 return rename_func("MAKE_TIME")(self, expression)
953 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 954 sec = expression.args["sec"] 955 956 milli = expression.args.get("milli") 957 if milli is not None: 958 sec += milli.pop() / exp.Literal.number(1000.0) 959 960 nano = expression.args.get("nano") 961 if nano is not None: 962 sec += nano.pop() / exp.Literal.number(1000000000.0) 963 964 if milli or nano: 965 expression.set("sec", sec) 966 967 return rename_func("MAKE_TIMESTAMP")(self, expression)
969 def tablesample_sql( 970 self, 971 expression: exp.TableSample, 972 tablesample_keyword: t.Optional[str] = None, 973 ) -> str: 974 if not isinstance(expression.parent, exp.Select): 975 # This sample clause only applies to a single source, not the entire resulting relation 976 tablesample_keyword = "TABLESAMPLE" 977 978 if expression.args.get("size"): 979 method = expression.args.get("method") 980 if method and method.name.upper() != "RESERVOIR": 981 self.unsupported( 982 f"Sampling method {method} is not supported with a discrete sample count, " 983 "defaulting to reservoir sampling" 984 ) 985 expression.set("method", exp.var("RESERVOIR")) 986 987 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
994 def join_sql(self, expression: exp.Join) -> str: 995 if ( 996 expression.side == "LEFT" 997 and not expression.args.get("on") 998 and isinstance(expression.this, exp.Unnest) 999 ): 1000 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1001 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1002 return super().join_sql(expression.on(exp.true())) 1003 1004 return super().join_sql(expression)
1020 def bracket_sql(self, expression: exp.Bracket) -> str: 1021 if self.dialect.version >= Version("1.2"): 1022 return super().bracket_sql(expression) 1023 1024 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1025 this = expression.this 1026 if isinstance(this, exp.Array): 1027 this.replace(exp.paren(this)) 1028 1029 bracket = super().bracket_sql(expression) 1030 1031 if not expression.args.get("returns_list_for_maps"): 1032 if not this.type: 1033 from sqlglot.optimizer.annotate_types import annotate_types 1034 1035 this = annotate_types(this, dialect=self.dialect) 1036 1037 if this.is_type(exp.DataType.Type.MAP): 1038 bracket = f"({bracket})[1]" 1039 1040 return bracket
1042 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1043 expression_sql = self.sql(expression, "expression") 1044 1045 func = expression.this 1046 if isinstance(func, exp.PERCENTILES): 1047 # Make the order key the first arg and slide the fraction to the right 1048 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1049 order_col = expression.find(exp.Ordered) 1050 if order_col: 1051 func.set("expression", func.this) 1052 func.set("this", order_col.this) 1053 1054 this = self.sql(expression, "this").rstrip(")") 1055 1056 return f"{this}{expression_sql})"
1058 def length_sql(self, expression: exp.Length) -> str: 1059 arg = expression.this 1060 1061 # Dialects like BQ and Snowflake also accept binary values as args, so 1062 # DDB will attempt to infer the type or resort to case/when resolution 1063 if not expression.args.get("binary") or arg.is_string: 1064 return self.func("LENGTH", arg) 1065 1066 if not arg.type: 1067 from sqlglot.optimizer.annotate_types import annotate_types 1068 1069 arg = annotate_types(arg, dialect=self.dialect) 1070 1071 if arg.is_type(*exp.DataType.TEXT_TYPES): 1072 return self.func("LENGTH", arg) 1073 1074 # We need these casts to make duckdb's static type checker happy 1075 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1076 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1077 1078 case = ( 1079 exp.case(self.func("TYPEOF", arg)) 1080 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1081 .else_( 1082 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1083 ) # anonymous to break length_sql recursion 1084 ) 1085 1086 return self.sql(case)
1088 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1089 this = expression.this 1090 key = expression.args.get("key") 1091 key_sql = key.name if isinstance(key, exp.Expression) else "" 1092 value_sql = self.sql(expression, "value") 1093 1094 kv_sql = f"{key_sql} := {value_sql}" 1095 1096 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1097 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1098 if isinstance(this, exp.Struct) and not this.expressions: 1099 return self.func("STRUCT_PACK", kv_sql) 1100 1101 return self.func("STRUCT_INSERT", this, kv_sql)
1103 def unnest_sql(self, expression: exp.Unnest) -> str: 1104 explode_array = expression.args.get("explode_array") 1105 if explode_array: 1106 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1107 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1108 expression.expressions.append( 1109 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1110 ) 1111 1112 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1113 alias = expression.args.get("alias") 1114 if isinstance(alias, exp.TableAlias): 1115 expression.set("alias", None) 1116 if alias.columns: 1117 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1118 1119 unnest_sql = super().unnest_sql(expression) 1120 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1121 return self.sql(select) 1122 1123 return super().unnest_sql(expression)
1125 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1126 this = expression.this 1127 1128 if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1129 # DuckDB should render IGNORE NULLS only for the general-purpose 1130 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1131 return super().ignorenulls_sql(expression) 1132 1133 if isinstance(this, exp.First): 1134 this = exp.AnyValue(this=this.this) 1135 1136 if not isinstance(this, exp.AnyValue): 1137 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1138 1139 return self.sql(this)
1141 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1142 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1143 # DuckDB should render RESPECT NULLS only for the general-purpose 1144 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1145 return super().respectnulls_sql(expression) 1146 1147 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1148 return self.sql(expression, "this")
1150 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1151 this = self.sql(expression, "this") 1152 null_text = self.sql(expression, "null") 1153 1154 if null_text: 1155 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1156 1157 return self.func("ARRAY_TO_STRING", this, expression.expression)
1159 @unsupported_args("position", "occurrence") 1160 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1161 group = expression.args.get("group") 1162 params = expression.args.get("parameters") 1163 1164 # Do not render group if there is no following argument, 1165 # and it's the default value for this dialect 1166 if ( 1167 not params 1168 and group 1169 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1170 ): 1171 group = None 1172 return self.func( 1173 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1174 )
1176 @unsupported_args("culture") 1177 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1178 fmt = expression.args.get("format") 1179 if fmt and fmt.is_int: 1180 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1181 1182 self.unsupported("Only integer formats are supported by NumberToStr") 1183 return self.function_fallback_sql(expression)
1196 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1197 this = expression.this 1198 parent = expression.parent 1199 1200 # The default Spark aliases are "pos" and "col", unless specified otherwise 1201 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1202 1203 if isinstance(parent, exp.Aliases): 1204 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1205 pos, col = parent.expressions 1206 elif isinstance(parent, exp.Table): 1207 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1208 alias = parent.args.get("alias") 1209 if alias: 1210 pos, col = alias.columns or [pos, col] 1211 alias.pop() 1212 1213 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1214 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1215 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1216 gen_subscripts = self.sql( 1217 exp.Alias( 1218 this=exp.Anonymous( 1219 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1220 ) 1221 - exp.Literal.number(1), 1222 alias=pos, 1223 ) 1224 ) 1225 1226 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1227 1228 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1229 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1230 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1231 1232 return posexplode_sql
1234 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1235 this = expression.this 1236 1237 if not this.type: 1238 from sqlglot.optimizer.annotate_types import annotate_types 1239 1240 this = annotate_types(this, dialect=self.dialect) 1241 1242 if this.is_type(*exp.DataType.TEXT_TYPES): 1243 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1244 1245 func = self.func( 1246 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1247 ) 1248 1249 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1250 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1251 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1252 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1253 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1254 return self.sql(exp.Cast(this=func, to=this.type)) 1255 1256 return self.sql(func)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql