sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 strposition_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39 groupconcat_sql, 40) 41from sqlglot.generator import unsupported_args 42from sqlglot.helper import seq_get 43from sqlglot.tokens import TokenType 44from sqlglot.parser import binary_range_parser 45 46DATETIME_DELTA = t.Union[ 47 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 48] 49 50WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 51 exp.FirstValue, 52 exp.LastValue, 53 exp.Lag, 54 exp.Lead, 55 exp.NthValue, 56) 57 58 59def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 60 this = expression.this 61 unit = unit_to_var(expression) 62 op = ( 63 "+" 64 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 65 else "-" 66 ) 67 68 to_type: t.Optional[DATA_TYPE] = None 69 if isinstance(expression, exp.TsOrDsAdd): 70 to_type = expression.return_type 71 elif this.is_string: 72 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 73 to_type = ( 74 exp.DataType.Type.DATETIME 75 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 76 else exp.DataType.Type.DATE 77 ) 78 79 this = exp.cast(this, to_type) if to_type else this 80 81 expr = expression.expression 82 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 83 84 return f"{self.sql(this)} {op} {self.sql(interval)}" 85 86 87# BigQuery -> DuckDB conversion for the DATE function 88def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 89 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 90 zone = self.sql(expression, "zone") 91 92 if zone: 93 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 94 date_str = f"{date_str} || ' ' || {zone}" 95 96 # This will create a TIMESTAMP with time zone information 97 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 98 99 return result 100 101 102# BigQuery -> DuckDB conversion for the TIME_DIFF function 103def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 104 this = exp.cast(expression.this, exp.DataType.Type.TIME) 105 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 106 107 # Although the 2 dialects share similar signatures, BQ seems to inverse 108 # the sign of the result so the start/end time operands are flipped 109 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 110 111 112@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 113def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 114 return self.func("ARRAY_SORT", expression.this) 115 116 117def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 118 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 119 return self.func(name, expression.this) 120 121 122def _build_sort_array_desc(args: t.List) -> exp.Expression: 123 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 124 125 126def _build_date_diff(args: t.List) -> exp.Expression: 127 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 128 129 130def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 131 def _builder(args: t.List) -> exp.GenerateSeries: 132 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 133 if len(args) == 1: 134 # DuckDB uses 0 as a default for the series' start when it's omitted 135 args.insert(0, exp.Literal.number("0")) 136 137 gen_series = exp.GenerateSeries.from_arg_list(args) 138 gen_series.set("is_end_exclusive", end_exclusive) 139 140 return gen_series 141 142 return _builder 143 144 145def _build_make_timestamp(args: t.List) -> exp.Expression: 146 if len(args) == 1: 147 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 148 149 return exp.TimestampFromParts( 150 year=seq_get(args, 0), 151 month=seq_get(args, 1), 152 day=seq_get(args, 2), 153 hour=seq_get(args, 3), 154 min=seq_get(args, 4), 155 sec=seq_get(args, 5), 156 ) 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path) 304 305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "CHARACTER VARYING": TokenType.TEXT, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 335 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 336 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 337 "TIMESTAMP_US": TokenType.TIMESTAMP, 338 "UBIGINT": TokenType.UBIGINT, 339 "UINTEGER": TokenType.UINT, 340 "USMALLINT": TokenType.USMALLINT, 341 "UTINYINT": TokenType.UTINYINT, 342 "VARCHAR": TokenType.TEXT, 343 } 344 KEYWORDS.pop("/*+") 345 346 SINGLE_TOKENS = { 347 **tokens.Tokenizer.SINGLE_TOKENS, 348 "$": TokenType.PARAMETER, 349 } 350 351 class Parser(parser.Parser): 352 BITWISE = { 353 **parser.Parser.BITWISE, 354 TokenType.TILDA: exp.RegexpLike, 355 } 356 BITWISE.pop(TokenType.CARET) 357 358 RANGE_PARSERS = { 359 **parser.Parser.RANGE_PARSERS, 360 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 361 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 362 } 363 364 EXPONENT = { 365 **parser.Parser.EXPONENT, 366 TokenType.CARET: exp.Pow, 367 TokenType.DSTAR: exp.Pow, 368 } 369 370 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 371 372 FUNCTIONS = { 373 **parser.Parser.FUNCTIONS, 374 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 375 "ARRAY_SORT": exp.SortArray.from_arg_list, 376 "DATEDIFF": _build_date_diff, 377 "DATE_DIFF": _build_date_diff, 378 "DATE_TRUNC": date_trunc_to_time, 379 "DATETRUNC": date_trunc_to_time, 380 "DECODE": lambda args: exp.Decode( 381 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 382 ), 383 "EDITDIST3": exp.Levenshtein.from_arg_list, 384 "ENCODE": lambda args: exp.Encode( 385 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 386 ), 387 "EPOCH": exp.TimeToUnix.from_arg_list, 388 "EPOCH_MS": lambda args: exp.UnixToTime( 389 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 390 ), 391 "GENERATE_SERIES": _build_generate_series(), 392 "JSON": exp.ParseJSON.from_arg_list, 393 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 394 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 395 "LIST_HAS": exp.ArrayContains.from_arg_list, 396 "LIST_REVERSE_SORT": _build_sort_array_desc, 397 "LIST_SORT": exp.SortArray.from_arg_list, 398 "LIST_VALUE": lambda args: exp.Array(expressions=args), 399 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 400 "MAKE_TIMESTAMP": _build_make_timestamp, 401 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 402 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 403 "RANGE": _build_generate_series(end_exclusive=True), 404 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 405 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 406 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 407 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 408 this=seq_get(args, 0), 409 expression=seq_get(args, 1), 410 replacement=seq_get(args, 2), 411 modifiers=seq_get(args, 3), 412 ), 413 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 414 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 415 "STRING_SPLIT": exp.Split.from_arg_list, 416 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 417 "STRING_TO_ARRAY": exp.Split.from_arg_list, 418 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 419 "STRUCT_PACK": exp.Struct.from_arg_list, 420 "STR_SPLIT": exp.Split.from_arg_list, 421 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 422 "TIME_BUCKET": exp.DateBin.from_arg_list, 423 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 424 "UNNEST": exp.Explode.from_arg_list, 425 "XOR": binary_from_function(exp.BitwiseXor), 426 } 427 428 FUNCTIONS.pop("DATE_SUB") 429 FUNCTIONS.pop("GLOB") 430 431 FUNCTION_PARSERS = { 432 **parser.Parser.FUNCTION_PARSERS, 433 **dict.fromkeys( 434 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 435 ), 436 } 437 FUNCTION_PARSERS.pop("DECODE") 438 439 NO_PAREN_FUNCTION_PARSERS = { 440 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 441 "MAP": lambda self: self._parse_map(), 442 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 443 } 444 445 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 446 TokenType.SEMI, 447 TokenType.ANTI, 448 } 449 450 PLACEHOLDER_PARSERS = { 451 **parser.Parser.PLACEHOLDER_PARSERS, 452 TokenType.PARAMETER: lambda self: ( 453 self.expression(exp.Placeholder, this=self._prev.text) 454 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 455 else None 456 ), 457 } 458 459 TYPE_CONVERTERS = { 460 # https://duckdb.org/docs/sql/data_types/numeric 461 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 462 # https://duckdb.org/docs/sql/data_types/text 463 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 464 } 465 466 STATEMENT_PARSERS = { 467 **parser.Parser.STATEMENT_PARSERS, 468 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 469 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 470 } 471 472 def _parse_expression(self) -> t.Optional[exp.Expression]: 473 # DuckDB supports prefix aliases, e.g. foo: 1 474 if self._next and self._next.token_type == TokenType.COLON: 475 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 476 self._match(TokenType.COLON) 477 comments = self._prev_comments or [] 478 479 this = self._parse_assignment() 480 if isinstance(this, exp.Expression): 481 # Moves the comment next to the alias in `alias: expr /* comment */` 482 comments += this.pop_comments() or [] 483 484 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 485 486 return super()._parse_expression() 487 488 def _parse_table( 489 self, 490 schema: bool = False, 491 joins: bool = False, 492 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 493 parse_bracket: bool = False, 494 is_db_reference: bool = False, 495 parse_partition: bool = False, 496 ) -> t.Optional[exp.Expression]: 497 # DuckDB supports prefix aliases, e.g. FROM foo: bar 498 if self._next and self._next.token_type == TokenType.COLON: 499 alias = self._parse_table_alias( 500 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 501 ) 502 self._match(TokenType.COLON) 503 comments = self._prev_comments or [] 504 else: 505 alias = None 506 comments = [] 507 508 table = super()._parse_table( 509 schema=schema, 510 joins=joins, 511 alias_tokens=alias_tokens, 512 parse_bracket=parse_bracket, 513 is_db_reference=is_db_reference, 514 parse_partition=parse_partition, 515 ) 516 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 517 # Moves the comment next to the alias in `alias: table /* comment */` 518 comments += table.pop_comments() or [] 519 alias.comments = alias.pop_comments() + comments 520 table.set("alias", alias) 521 522 return table 523 524 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 525 # https://duckdb.org/docs/sql/samples.html 526 sample = super()._parse_table_sample(as_modifier=as_modifier) 527 if sample and not sample.args.get("method"): 528 if sample.args.get("size"): 529 sample.set("method", exp.var("RESERVOIR")) 530 else: 531 sample.set("method", exp.var("SYSTEM")) 532 533 return sample 534 535 def _parse_bracket( 536 self, this: t.Optional[exp.Expression] = None 537 ) -> t.Optional[exp.Expression]: 538 bracket = super()._parse_bracket(this) 539 540 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 541 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 542 bracket.set("returns_list_for_maps", True) 543 544 return bracket 545 546 def _parse_map(self) -> exp.ToMap | exp.Map: 547 if self._match(TokenType.L_BRACE, advance=False): 548 return self.expression(exp.ToMap, this=self._parse_bracket()) 549 550 args = self._parse_wrapped_csv(self._parse_assignment) 551 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 552 553 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 554 return self._parse_field_def() 555 556 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 557 if len(aggregations) == 1: 558 return super()._pivot_column_names(aggregations) 559 return pivot_column_names(aggregations, dialect="duckdb") 560 561 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 562 def _parse_attach_option() -> exp.AttachOption: 563 return self.expression( 564 exp.AttachOption, 565 this=self._parse_var(any_token=True), 566 expression=self._parse_field(any_token=True), 567 ) 568 569 self._match(TokenType.DATABASE) 570 exists = self._parse_exists(not_=is_attach) 571 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 572 573 if self._match(TokenType.L_PAREN, advance=False): 574 expressions = self._parse_wrapped_csv(_parse_attach_option) 575 else: 576 expressions = None 577 578 return ( 579 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 580 if is_attach 581 else self.expression(exp.Detach, this=this, exists=exists) 582 ) 583 584 class Generator(generator.Generator): 585 PARAMETER_TOKEN = "$" 586 NAMED_PLACEHOLDER_TOKEN = "$" 587 JOIN_HINTS = False 588 TABLE_HINTS = False 589 QUERY_HINTS = False 590 LIMIT_FETCH = "LIMIT" 591 STRUCT_DELIMITER = ("(", ")") 592 RENAME_TABLE_WITH_DB = False 593 NVL2_SUPPORTED = False 594 SEMI_ANTI_JOIN_WITH_SIDE = False 595 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 596 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 597 LAST_DAY_SUPPORTS_DATE_PART = False 598 JSON_KEY_VALUE_PAIR_SEP = "," 599 IGNORE_NULLS_IN_FUNC = True 600 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 601 SUPPORTS_CREATE_TABLE_LIKE = False 602 MULTI_ARG_DISTINCT = False 603 CAN_IMPLEMENT_ARRAY_ANY = True 604 SUPPORTS_TO_NUMBER = False 605 COPY_HAS_INTO_KEYWORD = False 606 STAR_EXCEPT = "EXCLUDE" 607 PAD_FILL_PATTERN_IS_REQUIRED = True 608 ARRAY_CONCAT_IS_VAR_LEN = False 609 ARRAY_SIZE_DIM_REQUIRED = False 610 611 TRANSFORMS = { 612 **generator.Generator.TRANSFORMS, 613 exp.ApproxDistinct: approx_count_distinct_sql, 614 exp.Array: inline_array_unless_query, 615 exp.ArrayFilter: rename_func("LIST_FILTER"), 616 exp.ArraySort: _array_sort_sql, 617 exp.ArraySum: rename_func("LIST_SUM"), 618 exp.BitwiseXor: rename_func("XOR"), 619 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 620 exp.CurrentDate: lambda *_: "CURRENT_DATE", 621 exp.CurrentTime: lambda *_: "CURRENT_TIME", 622 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 623 exp.DayOfMonth: rename_func("DAYOFMONTH"), 624 exp.DayOfWeek: rename_func("DAYOFWEEK"), 625 exp.DayOfWeekIso: rename_func("ISODOW"), 626 exp.DayOfYear: rename_func("DAYOFYEAR"), 627 exp.DataType: _datatype_sql, 628 exp.Date: _date_sql, 629 exp.DateAdd: _date_delta_sql, 630 exp.DateFromParts: rename_func("MAKE_DATE"), 631 exp.DateSub: _date_delta_sql, 632 exp.DateDiff: _date_diff_sql, 633 exp.DateStrToDate: datestrtodate_sql, 634 exp.Datetime: no_datetime_sql, 635 exp.DatetimeSub: _date_delta_sql, 636 exp.DatetimeAdd: _date_delta_sql, 637 exp.DateToDi: lambda self, 638 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 639 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 640 exp.DiToDate: lambda self, 641 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 642 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 643 exp.GenerateDateArray: _generate_datetime_array_sql, 644 exp.GenerateTimestampArray: _generate_datetime_array_sql, 645 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 646 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 647 exp.Explode: rename_func("UNNEST"), 648 exp.IntDiv: lambda self, e: self.binary(e, "//"), 649 exp.IsInf: rename_func("ISINF"), 650 exp.IsNan: rename_func("ISNAN"), 651 exp.JSONBExists: rename_func("JSON_EXISTS"), 652 exp.JSONExtract: _arrow_json_extract_sql, 653 exp.JSONExtractArray: _json_extract_value_array_sql, 654 exp.JSONExtractScalar: _arrow_json_extract_sql, 655 exp.JSONFormat: _json_format_sql, 656 exp.JSONValueArray: _json_extract_value_array_sql, 657 exp.Lateral: explode_to_unnest_sql, 658 exp.LogicalOr: rename_func("BOOL_OR"), 659 exp.LogicalAnd: rename_func("BOOL_AND"), 660 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 661 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 662 exp.MonthsBetween: lambda self, e: self.func( 663 "DATEDIFF", 664 "'month'", 665 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 666 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 667 ), 668 exp.PercentileCont: rename_func("QUANTILE_CONT"), 669 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 670 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 671 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 672 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 673 exp.RegexpReplace: lambda self, e: self.func( 674 "REGEXP_REPLACE", 675 e.this, 676 e.expression, 677 e.args.get("replacement"), 678 e.args.get("modifiers"), 679 ), 680 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 681 exp.RegexpILike: lambda self, e: self.func( 682 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 683 ), 684 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 685 exp.Return: lambda self, e: self.sql(e, "this"), 686 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 687 exp.Rand: rename_func("RANDOM"), 688 exp.SHA: rename_func("SHA1"), 689 exp.SHA2: sha256_sql, 690 exp.Split: rename_func("STR_SPLIT"), 691 exp.SortArray: _sort_array_sql, 692 exp.StrPosition: strposition_sql, 693 exp.StrToUnix: lambda self, e: self.func( 694 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 695 ), 696 exp.Struct: _struct_sql, 697 exp.Transform: rename_func("LIST_TRANSFORM"), 698 exp.TimeAdd: _date_delta_sql, 699 exp.Time: no_time_sql, 700 exp.TimeDiff: _timediff_sql, 701 exp.Timestamp: no_timestamp_sql, 702 exp.TimestampDiff: lambda self, e: self.func( 703 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 704 ), 705 exp.TimestampTrunc: timestamptrunc_sql(), 706 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 707 exp.TimeStrToTime: timestrtotime_sql, 708 exp.TimeStrToUnix: lambda self, e: self.func( 709 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 710 ), 711 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 712 exp.TimeToUnix: rename_func("EPOCH"), 713 exp.TsOrDiToDi: lambda self, 714 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 715 exp.TsOrDsAdd: _date_delta_sql, 716 exp.TsOrDsDiff: lambda self, e: self.func( 717 "DATE_DIFF", 718 f"'{e.args.get('unit') or 'DAY'}'", 719 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 720 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 721 ), 722 exp.UnixToStr: lambda self, e: self.func( 723 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 724 ), 725 exp.DatetimeTrunc: lambda self, e: self.func( 726 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 727 ), 728 exp.UnixToTime: _unix_to_time_sql, 729 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 730 exp.VariancePop: rename_func("VAR_POP"), 731 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 732 exp.Xor: bool_xor_sql, 733 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 734 rename_func("LEVENSHTEIN") 735 ), 736 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 737 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 738 exp.DateBin: rename_func("TIME_BUCKET"), 739 } 740 741 SUPPORTED_JSON_PATH_PARTS = { 742 exp.JSONPathKey, 743 exp.JSONPathRoot, 744 exp.JSONPathSubscript, 745 exp.JSONPathWildcard, 746 } 747 748 TYPE_MAPPING = { 749 **generator.Generator.TYPE_MAPPING, 750 exp.DataType.Type.BINARY: "BLOB", 751 exp.DataType.Type.BPCHAR: "TEXT", 752 exp.DataType.Type.CHAR: "TEXT", 753 exp.DataType.Type.DATETIME: "TIMESTAMP", 754 exp.DataType.Type.FLOAT: "REAL", 755 exp.DataType.Type.JSONB: "JSON", 756 exp.DataType.Type.NCHAR: "TEXT", 757 exp.DataType.Type.NVARCHAR: "TEXT", 758 exp.DataType.Type.UINT: "UINTEGER", 759 exp.DataType.Type.VARBINARY: "BLOB", 760 exp.DataType.Type.ROWVERSION: "BLOB", 761 exp.DataType.Type.VARCHAR: "TEXT", 762 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 763 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 764 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 765 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 766 } 767 768 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 769 RESERVED_KEYWORDS = { 770 "array", 771 "analyse", 772 "union", 773 "all", 774 "when", 775 "in_p", 776 "default", 777 "create_p", 778 "window", 779 "asymmetric", 780 "to", 781 "else", 782 "localtime", 783 "from", 784 "end_p", 785 "select", 786 "current_date", 787 "foreign", 788 "with", 789 "grant", 790 "session_user", 791 "or", 792 "except", 793 "references", 794 "fetch", 795 "limit", 796 "group_p", 797 "leading", 798 "into", 799 "collate", 800 "offset", 801 "do", 802 "then", 803 "localtimestamp", 804 "check_p", 805 "lateral_p", 806 "current_role", 807 "where", 808 "asc_p", 809 "placing", 810 "desc_p", 811 "user", 812 "unique", 813 "initially", 814 "column", 815 "both", 816 "some", 817 "as", 818 "any", 819 "only", 820 "deferrable", 821 "null_p", 822 "current_time", 823 "true_p", 824 "table", 825 "case", 826 "trailing", 827 "variadic", 828 "for", 829 "on", 830 "distinct", 831 "false_p", 832 "not", 833 "constraint", 834 "current_timestamp", 835 "returning", 836 "primary", 837 "intersect", 838 "having", 839 "analyze", 840 "current_user", 841 "and", 842 "cast", 843 "symmetric", 844 "using", 845 "order", 846 "current_catalog", 847 } 848 849 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 850 851 # DuckDB doesn't generally support CREATE TABLE .. properties 852 # https://duckdb.org/docs/sql/statements/create_table.html 853 PROPERTIES_LOCATION = { 854 prop: exp.Properties.Location.UNSUPPORTED 855 for prop in generator.Generator.PROPERTIES_LOCATION 856 } 857 858 # There are a few exceptions (e.g. temporary tables) which are supported or 859 # can be transpiled to DuckDB, so we explicitly override them accordingly 860 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 861 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 862 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 863 864 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 865 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 866 867 def strtotime_sql(self, expression: exp.StrToTime) -> str: 868 if expression.args.get("safe"): 869 formatted_time = self.format_time(expression) 870 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 871 return str_to_time_sql(self, expression) 872 873 def strtodate_sql(self, expression: exp.StrToDate) -> str: 874 if expression.args.get("safe"): 875 formatted_time = self.format_time(expression) 876 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 877 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 878 879 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 880 arg = expression.this 881 if expression.args.get("safe"): 882 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 883 return self.func("JSON", arg) 884 885 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 886 nano = expression.args.get("nano") 887 if nano is not None: 888 expression.set( 889 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 890 ) 891 892 return rename_func("MAKE_TIME")(self, expression) 893 894 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 895 sec = expression.args["sec"] 896 897 milli = expression.args.get("milli") 898 if milli is not None: 899 sec += milli.pop() / exp.Literal.number(1000.0) 900 901 nano = expression.args.get("nano") 902 if nano is not None: 903 sec += nano.pop() / exp.Literal.number(1000000000.0) 904 905 if milli or nano: 906 expression.set("sec", sec) 907 908 return rename_func("MAKE_TIMESTAMP")(self, expression) 909 910 def tablesample_sql( 911 self, 912 expression: exp.TableSample, 913 tablesample_keyword: t.Optional[str] = None, 914 ) -> str: 915 if not isinstance(expression.parent, exp.Select): 916 # This sample clause only applies to a single source, not the entire resulting relation 917 tablesample_keyword = "TABLESAMPLE" 918 919 if expression.args.get("size"): 920 method = expression.args.get("method") 921 if method and method.name.upper() != "RESERVOIR": 922 self.unsupported( 923 f"Sampling method {method} is not supported with a discrete sample count, " 924 "defaulting to reservoir sampling" 925 ) 926 expression.set("method", exp.var("RESERVOIR")) 927 928 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 929 930 def interval_sql(self, expression: exp.Interval) -> str: 931 multiplier: t.Optional[int] = None 932 unit = expression.text("unit").lower() 933 934 if unit.startswith("week"): 935 multiplier = 7 936 if unit.startswith("quarter"): 937 multiplier = 90 938 939 if multiplier: 940 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 941 942 return super().interval_sql(expression) 943 944 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 945 if isinstance(expression.parent, exp.UserDefinedFunction): 946 return self.sql(expression, "this") 947 return super().columndef_sql(expression, sep) 948 949 def join_sql(self, expression: exp.Join) -> str: 950 if ( 951 expression.side == "LEFT" 952 and not expression.args.get("on") 953 and isinstance(expression.this, exp.Unnest) 954 ): 955 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 956 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 957 return super().join_sql(expression.on(exp.true())) 958 959 return super().join_sql(expression) 960 961 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 962 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 963 if expression.args.get("is_end_exclusive"): 964 return rename_func("RANGE")(self, expression) 965 966 return self.function_fallback_sql(expression) 967 968 def countif_sql(self, expression: exp.CountIf) -> str: 969 if self.dialect.version >= Version("1.2"): 970 return self.function_fallback_sql(expression) 971 972 # https://github.com/tobymao/sqlglot/pull/4749 973 return count_if_to_sum(self, expression) 974 975 def bracket_sql(self, expression: exp.Bracket) -> str: 976 if self.dialect.version >= Version("1.2"): 977 return super().bracket_sql(expression) 978 979 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 980 this = expression.this 981 if isinstance(this, exp.Array): 982 this.replace(exp.paren(this)) 983 984 bracket = super().bracket_sql(expression) 985 986 if not expression.args.get("returns_list_for_maps"): 987 if not this.type: 988 from sqlglot.optimizer.annotate_types import annotate_types 989 990 this = annotate_types(this) 991 992 if this.is_type(exp.DataType.Type.MAP): 993 bracket = f"({bracket})[1]" 994 995 return bracket 996 997 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 998 expression_sql = self.sql(expression, "expression") 999 1000 func = expression.this 1001 if isinstance(func, exp.PERCENTILES): 1002 # Make the order key the first arg and slide the fraction to the right 1003 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1004 order_col = expression.find(exp.Ordered) 1005 if order_col: 1006 func.set("expression", func.this) 1007 func.set("this", order_col.this) 1008 1009 this = self.sql(expression, "this").rstrip(")") 1010 1011 return f"{this}{expression_sql})" 1012 1013 def length_sql(self, expression: exp.Length) -> str: 1014 arg = expression.this 1015 1016 # Dialects like BQ and Snowflake also accept binary values as args, so 1017 # DDB will attempt to infer the type or resort to case/when resolution 1018 if not expression.args.get("binary") or arg.is_string: 1019 return self.func("LENGTH", arg) 1020 1021 if not arg.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 arg = annotate_types(arg) 1025 1026 if arg.is_type(*exp.DataType.TEXT_TYPES): 1027 return self.func("LENGTH", arg) 1028 1029 # We need these casts to make duckdb's static type checker happy 1030 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1031 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1032 1033 case = ( 1034 exp.case(self.func("TYPEOF", arg)) 1035 .when( 1036 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1037 ) # anonymous to break length_sql recursion 1038 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1039 ) 1040 1041 return self.sql(case) 1042 1043 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1044 this = expression.this 1045 key = expression.args.get("key") 1046 key_sql = key.name if isinstance(key, exp.Expression) else "" 1047 value_sql = self.sql(expression, "value") 1048 1049 kv_sql = f"{key_sql} := {value_sql}" 1050 1051 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1052 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1053 if isinstance(this, exp.Struct) and not this.expressions: 1054 return self.func("STRUCT_PACK", kv_sql) 1055 1056 return self.func("STRUCT_INSERT", this, kv_sql) 1057 1058 def unnest_sql(self, expression: exp.Unnest) -> str: 1059 explode_array = expression.args.get("explode_array") 1060 if explode_array: 1061 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1062 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1063 expression.expressions.append( 1064 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1065 ) 1066 1067 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1068 alias = expression.args.get("alias") 1069 if alias: 1070 expression.set("alias", None) 1071 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1072 1073 unnest_sql = super().unnest_sql(expression) 1074 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1075 return self.sql(select) 1076 1077 return super().unnest_sql(expression) 1078 1079 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1080 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1081 # DuckDB should render IGNORE NULLS only for the general-purpose 1082 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1083 return super().ignorenulls_sql(expression) 1084 1085 return self.sql(expression, "this") 1086 1087 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1088 this = self.sql(expression, "this") 1089 null_text = self.sql(expression, "null") 1090 1091 if null_text: 1092 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1093 1094 return self.func("ARRAY_TO_STRING", this, expression.expression) 1095 1096 @unsupported_args("position", "occurrence") 1097 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1098 group = expression.args.get("group") 1099 params = expression.args.get("parameters") 1100 1101 # Do not render group if there is no following argument, 1102 # and it's the default value for this dialect 1103 if ( 1104 not params 1105 and group 1106 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1107 ): 1108 group = None 1109 return self.func( 1110 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1111 ) 1112 1113 @unsupported_args("culture") 1114 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1115 fmt = expression.args.get("format") 1116 if fmt and fmt.is_int: 1117 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1118 1119 self.unsupported("Only integer formats are supported by NumberToStr") 1120 return self.function_fallback_sql(expression) 1121 1122 def autoincrementcolumnconstraint_sql(self, _) -> str: 1123 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1124 return ""
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path) 305 306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "CHARACTER VARYING": TokenType.TEXT, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 } 351 352 class Parser(parser.Parser): 353 BITWISE = { 354 **parser.Parser.BITWISE, 355 TokenType.TILDA: exp.RegexpLike, 356 } 357 BITWISE.pop(TokenType.CARET) 358 359 RANGE_PARSERS = { 360 **parser.Parser.RANGE_PARSERS, 361 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 362 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 363 } 364 365 EXPONENT = { 366 **parser.Parser.EXPONENT, 367 TokenType.CARET: exp.Pow, 368 TokenType.DSTAR: exp.Pow, 369 } 370 371 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 372 373 FUNCTIONS = { 374 **parser.Parser.FUNCTIONS, 375 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 376 "ARRAY_SORT": exp.SortArray.from_arg_list, 377 "DATEDIFF": _build_date_diff, 378 "DATE_DIFF": _build_date_diff, 379 "DATE_TRUNC": date_trunc_to_time, 380 "DATETRUNC": date_trunc_to_time, 381 "DECODE": lambda args: exp.Decode( 382 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 383 ), 384 "EDITDIST3": exp.Levenshtein.from_arg_list, 385 "ENCODE": lambda args: exp.Encode( 386 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 387 ), 388 "EPOCH": exp.TimeToUnix.from_arg_list, 389 "EPOCH_MS": lambda args: exp.UnixToTime( 390 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 391 ), 392 "GENERATE_SERIES": _build_generate_series(), 393 "JSON": exp.ParseJSON.from_arg_list, 394 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 395 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 396 "LIST_HAS": exp.ArrayContains.from_arg_list, 397 "LIST_REVERSE_SORT": _build_sort_array_desc, 398 "LIST_SORT": exp.SortArray.from_arg_list, 399 "LIST_VALUE": lambda args: exp.Array(expressions=args), 400 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 401 "MAKE_TIMESTAMP": _build_make_timestamp, 402 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 403 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 404 "RANGE": _build_generate_series(end_exclusive=True), 405 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 406 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 407 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 408 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 409 this=seq_get(args, 0), 410 expression=seq_get(args, 1), 411 replacement=seq_get(args, 2), 412 modifiers=seq_get(args, 3), 413 ), 414 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 415 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 416 "STRING_SPLIT": exp.Split.from_arg_list, 417 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 418 "STRING_TO_ARRAY": exp.Split.from_arg_list, 419 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 420 "STRUCT_PACK": exp.Struct.from_arg_list, 421 "STR_SPLIT": exp.Split.from_arg_list, 422 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 423 "TIME_BUCKET": exp.DateBin.from_arg_list, 424 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 425 "UNNEST": exp.Explode.from_arg_list, 426 "XOR": binary_from_function(exp.BitwiseXor), 427 } 428 429 FUNCTIONS.pop("DATE_SUB") 430 FUNCTIONS.pop("GLOB") 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 **dict.fromkeys( 435 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 436 ), 437 } 438 FUNCTION_PARSERS.pop("DECODE") 439 440 NO_PAREN_FUNCTION_PARSERS = { 441 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 442 "MAP": lambda self: self._parse_map(), 443 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 444 } 445 446 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 447 TokenType.SEMI, 448 TokenType.ANTI, 449 } 450 451 PLACEHOLDER_PARSERS = { 452 **parser.Parser.PLACEHOLDER_PARSERS, 453 TokenType.PARAMETER: lambda self: ( 454 self.expression(exp.Placeholder, this=self._prev.text) 455 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 456 else None 457 ), 458 } 459 460 TYPE_CONVERTERS = { 461 # https://duckdb.org/docs/sql/data_types/numeric 462 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 463 # https://duckdb.org/docs/sql/data_types/text 464 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 465 } 466 467 STATEMENT_PARSERS = { 468 **parser.Parser.STATEMENT_PARSERS, 469 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 470 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 471 } 472 473 def _parse_expression(self) -> t.Optional[exp.Expression]: 474 # DuckDB supports prefix aliases, e.g. foo: 1 475 if self._next and self._next.token_type == TokenType.COLON: 476 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 477 self._match(TokenType.COLON) 478 comments = self._prev_comments or [] 479 480 this = self._parse_assignment() 481 if isinstance(this, exp.Expression): 482 # Moves the comment next to the alias in `alias: expr /* comment */` 483 comments += this.pop_comments() or [] 484 485 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 486 487 return super()._parse_expression() 488 489 def _parse_table( 490 self, 491 schema: bool = False, 492 joins: bool = False, 493 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 494 parse_bracket: bool = False, 495 is_db_reference: bool = False, 496 parse_partition: bool = False, 497 ) -> t.Optional[exp.Expression]: 498 # DuckDB supports prefix aliases, e.g. FROM foo: bar 499 if self._next and self._next.token_type == TokenType.COLON: 500 alias = self._parse_table_alias( 501 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 502 ) 503 self._match(TokenType.COLON) 504 comments = self._prev_comments or [] 505 else: 506 alias = None 507 comments = [] 508 509 table = super()._parse_table( 510 schema=schema, 511 joins=joins, 512 alias_tokens=alias_tokens, 513 parse_bracket=parse_bracket, 514 is_db_reference=is_db_reference, 515 parse_partition=parse_partition, 516 ) 517 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 518 # Moves the comment next to the alias in `alias: table /* comment */` 519 comments += table.pop_comments() or [] 520 alias.comments = alias.pop_comments() + comments 521 table.set("alias", alias) 522 523 return table 524 525 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 526 # https://duckdb.org/docs/sql/samples.html 527 sample = super()._parse_table_sample(as_modifier=as_modifier) 528 if sample and not sample.args.get("method"): 529 if sample.args.get("size"): 530 sample.set("method", exp.var("RESERVOIR")) 531 else: 532 sample.set("method", exp.var("SYSTEM")) 533 534 return sample 535 536 def _parse_bracket( 537 self, this: t.Optional[exp.Expression] = None 538 ) -> t.Optional[exp.Expression]: 539 bracket = super()._parse_bracket(this) 540 541 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 542 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 543 bracket.set("returns_list_for_maps", True) 544 545 return bracket 546 547 def _parse_map(self) -> exp.ToMap | exp.Map: 548 if self._match(TokenType.L_BRACE, advance=False): 549 return self.expression(exp.ToMap, this=self._parse_bracket()) 550 551 args = self._parse_wrapped_csv(self._parse_assignment) 552 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 553 554 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 555 return self._parse_field_def() 556 557 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 558 if len(aggregations) == 1: 559 return super()._pivot_column_names(aggregations) 560 return pivot_column_names(aggregations, dialect="duckdb") 561 562 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 563 def _parse_attach_option() -> exp.AttachOption: 564 return self.expression( 565 exp.AttachOption, 566 this=self._parse_var(any_token=True), 567 expression=self._parse_field(any_token=True), 568 ) 569 570 self._match(TokenType.DATABASE) 571 exists = self._parse_exists(not_=is_attach) 572 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 573 574 if self._match(TokenType.L_PAREN, advance=False): 575 expressions = self._parse_wrapped_csv(_parse_attach_option) 576 else: 577 expressions = None 578 579 return ( 580 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 581 if is_attach 582 else self.expression(exp.Detach, this=this, exists=exists) 583 ) 584 585 class Generator(generator.Generator): 586 PARAMETER_TOKEN = "$" 587 NAMED_PLACEHOLDER_TOKEN = "$" 588 JOIN_HINTS = False 589 TABLE_HINTS = False 590 QUERY_HINTS = False 591 LIMIT_FETCH = "LIMIT" 592 STRUCT_DELIMITER = ("(", ")") 593 RENAME_TABLE_WITH_DB = False 594 NVL2_SUPPORTED = False 595 SEMI_ANTI_JOIN_WITH_SIDE = False 596 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 597 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 598 LAST_DAY_SUPPORTS_DATE_PART = False 599 JSON_KEY_VALUE_PAIR_SEP = "," 600 IGNORE_NULLS_IN_FUNC = True 601 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 602 SUPPORTS_CREATE_TABLE_LIKE = False 603 MULTI_ARG_DISTINCT = False 604 CAN_IMPLEMENT_ARRAY_ANY = True 605 SUPPORTS_TO_NUMBER = False 606 COPY_HAS_INTO_KEYWORD = False 607 STAR_EXCEPT = "EXCLUDE" 608 PAD_FILL_PATTERN_IS_REQUIRED = True 609 ARRAY_CONCAT_IS_VAR_LEN = False 610 ARRAY_SIZE_DIM_REQUIRED = False 611 612 TRANSFORMS = { 613 **generator.Generator.TRANSFORMS, 614 exp.ApproxDistinct: approx_count_distinct_sql, 615 exp.Array: inline_array_unless_query, 616 exp.ArrayFilter: rename_func("LIST_FILTER"), 617 exp.ArraySort: _array_sort_sql, 618 exp.ArraySum: rename_func("LIST_SUM"), 619 exp.BitwiseXor: rename_func("XOR"), 620 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 621 exp.CurrentDate: lambda *_: "CURRENT_DATE", 622 exp.CurrentTime: lambda *_: "CURRENT_TIME", 623 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 624 exp.DayOfMonth: rename_func("DAYOFMONTH"), 625 exp.DayOfWeek: rename_func("DAYOFWEEK"), 626 exp.DayOfWeekIso: rename_func("ISODOW"), 627 exp.DayOfYear: rename_func("DAYOFYEAR"), 628 exp.DataType: _datatype_sql, 629 exp.Date: _date_sql, 630 exp.DateAdd: _date_delta_sql, 631 exp.DateFromParts: rename_func("MAKE_DATE"), 632 exp.DateSub: _date_delta_sql, 633 exp.DateDiff: _date_diff_sql, 634 exp.DateStrToDate: datestrtodate_sql, 635 exp.Datetime: no_datetime_sql, 636 exp.DatetimeSub: _date_delta_sql, 637 exp.DatetimeAdd: _date_delta_sql, 638 exp.DateToDi: lambda self, 639 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 640 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 641 exp.DiToDate: lambda self, 642 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 643 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 644 exp.GenerateDateArray: _generate_datetime_array_sql, 645 exp.GenerateTimestampArray: _generate_datetime_array_sql, 646 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 647 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 648 exp.Explode: rename_func("UNNEST"), 649 exp.IntDiv: lambda self, e: self.binary(e, "//"), 650 exp.IsInf: rename_func("ISINF"), 651 exp.IsNan: rename_func("ISNAN"), 652 exp.JSONBExists: rename_func("JSON_EXISTS"), 653 exp.JSONExtract: _arrow_json_extract_sql, 654 exp.JSONExtractArray: _json_extract_value_array_sql, 655 exp.JSONExtractScalar: _arrow_json_extract_sql, 656 exp.JSONFormat: _json_format_sql, 657 exp.JSONValueArray: _json_extract_value_array_sql, 658 exp.Lateral: explode_to_unnest_sql, 659 exp.LogicalOr: rename_func("BOOL_OR"), 660 exp.LogicalAnd: rename_func("BOOL_AND"), 661 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 662 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 663 exp.MonthsBetween: lambda self, e: self.func( 664 "DATEDIFF", 665 "'month'", 666 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 667 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 668 ), 669 exp.PercentileCont: rename_func("QUANTILE_CONT"), 670 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 671 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 672 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 673 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 674 exp.RegexpReplace: lambda self, e: self.func( 675 "REGEXP_REPLACE", 676 e.this, 677 e.expression, 678 e.args.get("replacement"), 679 e.args.get("modifiers"), 680 ), 681 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 682 exp.RegexpILike: lambda self, e: self.func( 683 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 684 ), 685 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 686 exp.Return: lambda self, e: self.sql(e, "this"), 687 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 688 exp.Rand: rename_func("RANDOM"), 689 exp.SHA: rename_func("SHA1"), 690 exp.SHA2: sha256_sql, 691 exp.Split: rename_func("STR_SPLIT"), 692 exp.SortArray: _sort_array_sql, 693 exp.StrPosition: strposition_sql, 694 exp.StrToUnix: lambda self, e: self.func( 695 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 696 ), 697 exp.Struct: _struct_sql, 698 exp.Transform: rename_func("LIST_TRANSFORM"), 699 exp.TimeAdd: _date_delta_sql, 700 exp.Time: no_time_sql, 701 exp.TimeDiff: _timediff_sql, 702 exp.Timestamp: no_timestamp_sql, 703 exp.TimestampDiff: lambda self, e: self.func( 704 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 705 ), 706 exp.TimestampTrunc: timestamptrunc_sql(), 707 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 708 exp.TimeStrToTime: timestrtotime_sql, 709 exp.TimeStrToUnix: lambda self, e: self.func( 710 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 711 ), 712 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 713 exp.TimeToUnix: rename_func("EPOCH"), 714 exp.TsOrDiToDi: lambda self, 715 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 716 exp.TsOrDsAdd: _date_delta_sql, 717 exp.TsOrDsDiff: lambda self, e: self.func( 718 "DATE_DIFF", 719 f"'{e.args.get('unit') or 'DAY'}'", 720 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 721 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 722 ), 723 exp.UnixToStr: lambda self, e: self.func( 724 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 725 ), 726 exp.DatetimeTrunc: lambda self, e: self.func( 727 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 728 ), 729 exp.UnixToTime: _unix_to_time_sql, 730 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 731 exp.VariancePop: rename_func("VAR_POP"), 732 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 733 exp.Xor: bool_xor_sql, 734 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 735 rename_func("LEVENSHTEIN") 736 ), 737 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 738 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 739 exp.DateBin: rename_func("TIME_BUCKET"), 740 } 741 742 SUPPORTED_JSON_PATH_PARTS = { 743 exp.JSONPathKey, 744 exp.JSONPathRoot, 745 exp.JSONPathSubscript, 746 exp.JSONPathWildcard, 747 } 748 749 TYPE_MAPPING = { 750 **generator.Generator.TYPE_MAPPING, 751 exp.DataType.Type.BINARY: "BLOB", 752 exp.DataType.Type.BPCHAR: "TEXT", 753 exp.DataType.Type.CHAR: "TEXT", 754 exp.DataType.Type.DATETIME: "TIMESTAMP", 755 exp.DataType.Type.FLOAT: "REAL", 756 exp.DataType.Type.JSONB: "JSON", 757 exp.DataType.Type.NCHAR: "TEXT", 758 exp.DataType.Type.NVARCHAR: "TEXT", 759 exp.DataType.Type.UINT: "UINTEGER", 760 exp.DataType.Type.VARBINARY: "BLOB", 761 exp.DataType.Type.ROWVERSION: "BLOB", 762 exp.DataType.Type.VARCHAR: "TEXT", 763 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 764 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 765 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 766 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 767 } 768 769 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 770 RESERVED_KEYWORDS = { 771 "array", 772 "analyse", 773 "union", 774 "all", 775 "when", 776 "in_p", 777 "default", 778 "create_p", 779 "window", 780 "asymmetric", 781 "to", 782 "else", 783 "localtime", 784 "from", 785 "end_p", 786 "select", 787 "current_date", 788 "foreign", 789 "with", 790 "grant", 791 "session_user", 792 "or", 793 "except", 794 "references", 795 "fetch", 796 "limit", 797 "group_p", 798 "leading", 799 "into", 800 "collate", 801 "offset", 802 "do", 803 "then", 804 "localtimestamp", 805 "check_p", 806 "lateral_p", 807 "current_role", 808 "where", 809 "asc_p", 810 "placing", 811 "desc_p", 812 "user", 813 "unique", 814 "initially", 815 "column", 816 "both", 817 "some", 818 "as", 819 "any", 820 "only", 821 "deferrable", 822 "null_p", 823 "current_time", 824 "true_p", 825 "table", 826 "case", 827 "trailing", 828 "variadic", 829 "for", 830 "on", 831 "distinct", 832 "false_p", 833 "not", 834 "constraint", 835 "current_timestamp", 836 "returning", 837 "primary", 838 "intersect", 839 "having", 840 "analyze", 841 "current_user", 842 "and", 843 "cast", 844 "symmetric", 845 "using", 846 "order", 847 "current_catalog", 848 } 849 850 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 851 852 # DuckDB doesn't generally support CREATE TABLE .. properties 853 # https://duckdb.org/docs/sql/statements/create_table.html 854 PROPERTIES_LOCATION = { 855 prop: exp.Properties.Location.UNSUPPORTED 856 for prop in generator.Generator.PROPERTIES_LOCATION 857 } 858 859 # There are a few exceptions (e.g. temporary tables) which are supported or 860 # can be transpiled to DuckDB, so we explicitly override them accordingly 861 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 862 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 863 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 864 865 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 866 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 867 868 def strtotime_sql(self, expression: exp.StrToTime) -> str: 869 if expression.args.get("safe"): 870 formatted_time = self.format_time(expression) 871 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 872 return str_to_time_sql(self, expression) 873 874 def strtodate_sql(self, expression: exp.StrToDate) -> str: 875 if expression.args.get("safe"): 876 formatted_time = self.format_time(expression) 877 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 878 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 879 880 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 881 arg = expression.this 882 if expression.args.get("safe"): 883 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 884 return self.func("JSON", arg) 885 886 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 887 nano = expression.args.get("nano") 888 if nano is not None: 889 expression.set( 890 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 891 ) 892 893 return rename_func("MAKE_TIME")(self, expression) 894 895 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 896 sec = expression.args["sec"] 897 898 milli = expression.args.get("milli") 899 if milli is not None: 900 sec += milli.pop() / exp.Literal.number(1000.0) 901 902 nano = expression.args.get("nano") 903 if nano is not None: 904 sec += nano.pop() / exp.Literal.number(1000000000.0) 905 906 if milli or nano: 907 expression.set("sec", sec) 908 909 return rename_func("MAKE_TIMESTAMP")(self, expression) 910 911 def tablesample_sql( 912 self, 913 expression: exp.TableSample, 914 tablesample_keyword: t.Optional[str] = None, 915 ) -> str: 916 if not isinstance(expression.parent, exp.Select): 917 # This sample clause only applies to a single source, not the entire resulting relation 918 tablesample_keyword = "TABLESAMPLE" 919 920 if expression.args.get("size"): 921 method = expression.args.get("method") 922 if method and method.name.upper() != "RESERVOIR": 923 self.unsupported( 924 f"Sampling method {method} is not supported with a discrete sample count, " 925 "defaulting to reservoir sampling" 926 ) 927 expression.set("method", exp.var("RESERVOIR")) 928 929 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 930 931 def interval_sql(self, expression: exp.Interval) -> str: 932 multiplier: t.Optional[int] = None 933 unit = expression.text("unit").lower() 934 935 if unit.startswith("week"): 936 multiplier = 7 937 if unit.startswith("quarter"): 938 multiplier = 90 939 940 if multiplier: 941 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 942 943 return super().interval_sql(expression) 944 945 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 946 if isinstance(expression.parent, exp.UserDefinedFunction): 947 return self.sql(expression, "this") 948 return super().columndef_sql(expression, sep) 949 950 def join_sql(self, expression: exp.Join) -> str: 951 if ( 952 expression.side == "LEFT" 953 and not expression.args.get("on") 954 and isinstance(expression.this, exp.Unnest) 955 ): 956 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 957 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 958 return super().join_sql(expression.on(exp.true())) 959 960 return super().join_sql(expression) 961 962 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 963 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 964 if expression.args.get("is_end_exclusive"): 965 return rename_func("RANGE")(self, expression) 966 967 return self.function_fallback_sql(expression) 968 969 def countif_sql(self, expression: exp.CountIf) -> str: 970 if self.dialect.version >= Version("1.2"): 971 return self.function_fallback_sql(expression) 972 973 # https://github.com/tobymao/sqlglot/pull/4749 974 return count_if_to_sum(self, expression) 975 976 def bracket_sql(self, expression: exp.Bracket) -> str: 977 if self.dialect.version >= Version("1.2"): 978 return super().bracket_sql(expression) 979 980 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 981 this = expression.this 982 if isinstance(this, exp.Array): 983 this.replace(exp.paren(this)) 984 985 bracket = super().bracket_sql(expression) 986 987 if not expression.args.get("returns_list_for_maps"): 988 if not this.type: 989 from sqlglot.optimizer.annotate_types import annotate_types 990 991 this = annotate_types(this) 992 993 if this.is_type(exp.DataType.Type.MAP): 994 bracket = f"({bracket})[1]" 995 996 return bracket 997 998 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 999 expression_sql = self.sql(expression, "expression") 1000 1001 func = expression.this 1002 if isinstance(func, exp.PERCENTILES): 1003 # Make the order key the first arg and slide the fraction to the right 1004 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1005 order_col = expression.find(exp.Ordered) 1006 if order_col: 1007 func.set("expression", func.this) 1008 func.set("this", order_col.this) 1009 1010 this = self.sql(expression, "this").rstrip(")") 1011 1012 return f"{this}{expression_sql})" 1013 1014 def length_sql(self, expression: exp.Length) -> str: 1015 arg = expression.this 1016 1017 # Dialects like BQ and Snowflake also accept binary values as args, so 1018 # DDB will attempt to infer the type or resort to case/when resolution 1019 if not expression.args.get("binary") or arg.is_string: 1020 return self.func("LENGTH", arg) 1021 1022 if not arg.type: 1023 from sqlglot.optimizer.annotate_types import annotate_types 1024 1025 arg = annotate_types(arg) 1026 1027 if arg.is_type(*exp.DataType.TEXT_TYPES): 1028 return self.func("LENGTH", arg) 1029 1030 # We need these casts to make duckdb's static type checker happy 1031 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1032 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1033 1034 case = ( 1035 exp.case(self.func("TYPEOF", arg)) 1036 .when( 1037 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1038 ) # anonymous to break length_sql recursion 1039 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1040 ) 1041 1042 return self.sql(case) 1043 1044 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1045 this = expression.this 1046 key = expression.args.get("key") 1047 key_sql = key.name if isinstance(key, exp.Expression) else "" 1048 value_sql = self.sql(expression, "value") 1049 1050 kv_sql = f"{key_sql} := {value_sql}" 1051 1052 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1053 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1054 if isinstance(this, exp.Struct) and not this.expressions: 1055 return self.func("STRUCT_PACK", kv_sql) 1056 1057 return self.func("STRUCT_INSERT", this, kv_sql) 1058 1059 def unnest_sql(self, expression: exp.Unnest) -> str: 1060 explode_array = expression.args.get("explode_array") 1061 if explode_array: 1062 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1063 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1064 expression.expressions.append( 1065 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1066 ) 1067 1068 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1069 alias = expression.args.get("alias") 1070 if alias: 1071 expression.set("alias", None) 1072 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1073 1074 unnest_sql = super().unnest_sql(expression) 1075 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1076 return self.sql(select) 1077 1078 return super().unnest_sql(expression) 1079 1080 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1081 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1082 # DuckDB should render IGNORE NULLS only for the general-purpose 1083 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1084 return super().ignorenulls_sql(expression) 1085 1086 return self.sql(expression, "this") 1087 1088 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1089 this = self.sql(expression, "this") 1090 null_text = self.sql(expression, "null") 1091 1092 if null_text: 1093 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1094 1095 return self.func("ARRAY_TO_STRING", this, expression.expression) 1096 1097 @unsupported_args("position", "occurrence") 1098 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1099 group = expression.args.get("group") 1100 params = expression.args.get("parameters") 1101 1102 # Do not render group if there is no following argument, 1103 # and it's the default value for this dialect 1104 if ( 1105 not params 1106 and group 1107 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1108 ): 1109 group = None 1110 return self.func( 1111 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1112 ) 1113 1114 @unsupported_args("culture") 1115 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1116 fmt = expression.args.get("format") 1117 if fmt and fmt.is_int: 1118 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1119 1120 self.unsupported("Only integer formats are supported by NumberToStr") 1121 return self.function_fallback_sql(expression) 1122 1123 def autoincrementcolumnconstraint_sql(self, _) -> str: 1124 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1125 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path)
306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "CHARACTER VARYING": TokenType.TEXT, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
352 class Parser(parser.Parser): 353 BITWISE = { 354 **parser.Parser.BITWISE, 355 TokenType.TILDA: exp.RegexpLike, 356 } 357 BITWISE.pop(TokenType.CARET) 358 359 RANGE_PARSERS = { 360 **parser.Parser.RANGE_PARSERS, 361 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 362 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 363 } 364 365 EXPONENT = { 366 **parser.Parser.EXPONENT, 367 TokenType.CARET: exp.Pow, 368 TokenType.DSTAR: exp.Pow, 369 } 370 371 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 372 373 FUNCTIONS = { 374 **parser.Parser.FUNCTIONS, 375 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 376 "ARRAY_SORT": exp.SortArray.from_arg_list, 377 "DATEDIFF": _build_date_diff, 378 "DATE_DIFF": _build_date_diff, 379 "DATE_TRUNC": date_trunc_to_time, 380 "DATETRUNC": date_trunc_to_time, 381 "DECODE": lambda args: exp.Decode( 382 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 383 ), 384 "EDITDIST3": exp.Levenshtein.from_arg_list, 385 "ENCODE": lambda args: exp.Encode( 386 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 387 ), 388 "EPOCH": exp.TimeToUnix.from_arg_list, 389 "EPOCH_MS": lambda args: exp.UnixToTime( 390 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 391 ), 392 "GENERATE_SERIES": _build_generate_series(), 393 "JSON": exp.ParseJSON.from_arg_list, 394 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 395 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 396 "LIST_HAS": exp.ArrayContains.from_arg_list, 397 "LIST_REVERSE_SORT": _build_sort_array_desc, 398 "LIST_SORT": exp.SortArray.from_arg_list, 399 "LIST_VALUE": lambda args: exp.Array(expressions=args), 400 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 401 "MAKE_TIMESTAMP": _build_make_timestamp, 402 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 403 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 404 "RANGE": _build_generate_series(end_exclusive=True), 405 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 406 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 407 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 408 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 409 this=seq_get(args, 0), 410 expression=seq_get(args, 1), 411 replacement=seq_get(args, 2), 412 modifiers=seq_get(args, 3), 413 ), 414 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 415 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 416 "STRING_SPLIT": exp.Split.from_arg_list, 417 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 418 "STRING_TO_ARRAY": exp.Split.from_arg_list, 419 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 420 "STRUCT_PACK": exp.Struct.from_arg_list, 421 "STR_SPLIT": exp.Split.from_arg_list, 422 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 423 "TIME_BUCKET": exp.DateBin.from_arg_list, 424 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 425 "UNNEST": exp.Explode.from_arg_list, 426 "XOR": binary_from_function(exp.BitwiseXor), 427 } 428 429 FUNCTIONS.pop("DATE_SUB") 430 FUNCTIONS.pop("GLOB") 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 **dict.fromkeys( 435 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 436 ), 437 } 438 FUNCTION_PARSERS.pop("DECODE") 439 440 NO_PAREN_FUNCTION_PARSERS = { 441 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 442 "MAP": lambda self: self._parse_map(), 443 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 444 } 445 446 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 447 TokenType.SEMI, 448 TokenType.ANTI, 449 } 450 451 PLACEHOLDER_PARSERS = { 452 **parser.Parser.PLACEHOLDER_PARSERS, 453 TokenType.PARAMETER: lambda self: ( 454 self.expression(exp.Placeholder, this=self._prev.text) 455 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 456 else None 457 ), 458 } 459 460 TYPE_CONVERTERS = { 461 # https://duckdb.org/docs/sql/data_types/numeric 462 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 463 # https://duckdb.org/docs/sql/data_types/text 464 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 465 } 466 467 STATEMENT_PARSERS = { 468 **parser.Parser.STATEMENT_PARSERS, 469 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 470 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 471 } 472 473 def _parse_expression(self) -> t.Optional[exp.Expression]: 474 # DuckDB supports prefix aliases, e.g. foo: 1 475 if self._next and self._next.token_type == TokenType.COLON: 476 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 477 self._match(TokenType.COLON) 478 comments = self._prev_comments or [] 479 480 this = self._parse_assignment() 481 if isinstance(this, exp.Expression): 482 # Moves the comment next to the alias in `alias: expr /* comment */` 483 comments += this.pop_comments() or [] 484 485 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 486 487 return super()._parse_expression() 488 489 def _parse_table( 490 self, 491 schema: bool = False, 492 joins: bool = False, 493 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 494 parse_bracket: bool = False, 495 is_db_reference: bool = False, 496 parse_partition: bool = False, 497 ) -> t.Optional[exp.Expression]: 498 # DuckDB supports prefix aliases, e.g. FROM foo: bar 499 if self._next and self._next.token_type == TokenType.COLON: 500 alias = self._parse_table_alias( 501 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 502 ) 503 self._match(TokenType.COLON) 504 comments = self._prev_comments or [] 505 else: 506 alias = None 507 comments = [] 508 509 table = super()._parse_table( 510 schema=schema, 511 joins=joins, 512 alias_tokens=alias_tokens, 513 parse_bracket=parse_bracket, 514 is_db_reference=is_db_reference, 515 parse_partition=parse_partition, 516 ) 517 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 518 # Moves the comment next to the alias in `alias: table /* comment */` 519 comments += table.pop_comments() or [] 520 alias.comments = alias.pop_comments() + comments 521 table.set("alias", alias) 522 523 return table 524 525 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 526 # https://duckdb.org/docs/sql/samples.html 527 sample = super()._parse_table_sample(as_modifier=as_modifier) 528 if sample and not sample.args.get("method"): 529 if sample.args.get("size"): 530 sample.set("method", exp.var("RESERVOIR")) 531 else: 532 sample.set("method", exp.var("SYSTEM")) 533 534 return sample 535 536 def _parse_bracket( 537 self, this: t.Optional[exp.Expression] = None 538 ) -> t.Optional[exp.Expression]: 539 bracket = super()._parse_bracket(this) 540 541 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 542 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 543 bracket.set("returns_list_for_maps", True) 544 545 return bracket 546 547 def _parse_map(self) -> exp.ToMap | exp.Map: 548 if self._match(TokenType.L_BRACE, advance=False): 549 return self.expression(exp.ToMap, this=self._parse_bracket()) 550 551 args = self._parse_wrapped_csv(self._parse_assignment) 552 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 553 554 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 555 return self._parse_field_def() 556 557 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 558 if len(aggregations) == 1: 559 return super()._pivot_column_names(aggregations) 560 return pivot_column_names(aggregations, dialect="duckdb") 561 562 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 563 def _parse_attach_option() -> exp.AttachOption: 564 return self.expression( 565 exp.AttachOption, 566 this=self._parse_var(any_token=True), 567 expression=self._parse_field(any_token=True), 568 ) 569 570 self._match(TokenType.DATABASE) 571 exists = self._parse_exists(not_=is_attach) 572 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 573 574 if self._match(TokenType.L_PAREN, advance=False): 575 expressions = self._parse_wrapped_csv(_parse_attach_option) 576 else: 577 expressions = None 578 579 return ( 580 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 581 if is_attach 582 else self.expression(exp.Detach, this=this, exists=exists) 583 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
585 class Generator(generator.Generator): 586 PARAMETER_TOKEN = "$" 587 NAMED_PLACEHOLDER_TOKEN = "$" 588 JOIN_HINTS = False 589 TABLE_HINTS = False 590 QUERY_HINTS = False 591 LIMIT_FETCH = "LIMIT" 592 STRUCT_DELIMITER = ("(", ")") 593 RENAME_TABLE_WITH_DB = False 594 NVL2_SUPPORTED = False 595 SEMI_ANTI_JOIN_WITH_SIDE = False 596 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 597 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 598 LAST_DAY_SUPPORTS_DATE_PART = False 599 JSON_KEY_VALUE_PAIR_SEP = "," 600 IGNORE_NULLS_IN_FUNC = True 601 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 602 SUPPORTS_CREATE_TABLE_LIKE = False 603 MULTI_ARG_DISTINCT = False 604 CAN_IMPLEMENT_ARRAY_ANY = True 605 SUPPORTS_TO_NUMBER = False 606 COPY_HAS_INTO_KEYWORD = False 607 STAR_EXCEPT = "EXCLUDE" 608 PAD_FILL_PATTERN_IS_REQUIRED = True 609 ARRAY_CONCAT_IS_VAR_LEN = False 610 ARRAY_SIZE_DIM_REQUIRED = False 611 612 TRANSFORMS = { 613 **generator.Generator.TRANSFORMS, 614 exp.ApproxDistinct: approx_count_distinct_sql, 615 exp.Array: inline_array_unless_query, 616 exp.ArrayFilter: rename_func("LIST_FILTER"), 617 exp.ArraySort: _array_sort_sql, 618 exp.ArraySum: rename_func("LIST_SUM"), 619 exp.BitwiseXor: rename_func("XOR"), 620 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 621 exp.CurrentDate: lambda *_: "CURRENT_DATE", 622 exp.CurrentTime: lambda *_: "CURRENT_TIME", 623 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 624 exp.DayOfMonth: rename_func("DAYOFMONTH"), 625 exp.DayOfWeek: rename_func("DAYOFWEEK"), 626 exp.DayOfWeekIso: rename_func("ISODOW"), 627 exp.DayOfYear: rename_func("DAYOFYEAR"), 628 exp.DataType: _datatype_sql, 629 exp.Date: _date_sql, 630 exp.DateAdd: _date_delta_sql, 631 exp.DateFromParts: rename_func("MAKE_DATE"), 632 exp.DateSub: _date_delta_sql, 633 exp.DateDiff: _date_diff_sql, 634 exp.DateStrToDate: datestrtodate_sql, 635 exp.Datetime: no_datetime_sql, 636 exp.DatetimeSub: _date_delta_sql, 637 exp.DatetimeAdd: _date_delta_sql, 638 exp.DateToDi: lambda self, 639 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 640 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 641 exp.DiToDate: lambda self, 642 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 643 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 644 exp.GenerateDateArray: _generate_datetime_array_sql, 645 exp.GenerateTimestampArray: _generate_datetime_array_sql, 646 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 647 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 648 exp.Explode: rename_func("UNNEST"), 649 exp.IntDiv: lambda self, e: self.binary(e, "//"), 650 exp.IsInf: rename_func("ISINF"), 651 exp.IsNan: rename_func("ISNAN"), 652 exp.JSONBExists: rename_func("JSON_EXISTS"), 653 exp.JSONExtract: _arrow_json_extract_sql, 654 exp.JSONExtractArray: _json_extract_value_array_sql, 655 exp.JSONExtractScalar: _arrow_json_extract_sql, 656 exp.JSONFormat: _json_format_sql, 657 exp.JSONValueArray: _json_extract_value_array_sql, 658 exp.Lateral: explode_to_unnest_sql, 659 exp.LogicalOr: rename_func("BOOL_OR"), 660 exp.LogicalAnd: rename_func("BOOL_AND"), 661 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 662 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 663 exp.MonthsBetween: lambda self, e: self.func( 664 "DATEDIFF", 665 "'month'", 666 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 667 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 668 ), 669 exp.PercentileCont: rename_func("QUANTILE_CONT"), 670 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 671 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 672 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 673 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 674 exp.RegexpReplace: lambda self, e: self.func( 675 "REGEXP_REPLACE", 676 e.this, 677 e.expression, 678 e.args.get("replacement"), 679 e.args.get("modifiers"), 680 ), 681 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 682 exp.RegexpILike: lambda self, e: self.func( 683 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 684 ), 685 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 686 exp.Return: lambda self, e: self.sql(e, "this"), 687 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 688 exp.Rand: rename_func("RANDOM"), 689 exp.SHA: rename_func("SHA1"), 690 exp.SHA2: sha256_sql, 691 exp.Split: rename_func("STR_SPLIT"), 692 exp.SortArray: _sort_array_sql, 693 exp.StrPosition: strposition_sql, 694 exp.StrToUnix: lambda self, e: self.func( 695 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 696 ), 697 exp.Struct: _struct_sql, 698 exp.Transform: rename_func("LIST_TRANSFORM"), 699 exp.TimeAdd: _date_delta_sql, 700 exp.Time: no_time_sql, 701 exp.TimeDiff: _timediff_sql, 702 exp.Timestamp: no_timestamp_sql, 703 exp.TimestampDiff: lambda self, e: self.func( 704 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 705 ), 706 exp.TimestampTrunc: timestamptrunc_sql(), 707 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 708 exp.TimeStrToTime: timestrtotime_sql, 709 exp.TimeStrToUnix: lambda self, e: self.func( 710 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 711 ), 712 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 713 exp.TimeToUnix: rename_func("EPOCH"), 714 exp.TsOrDiToDi: lambda self, 715 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 716 exp.TsOrDsAdd: _date_delta_sql, 717 exp.TsOrDsDiff: lambda self, e: self.func( 718 "DATE_DIFF", 719 f"'{e.args.get('unit') or 'DAY'}'", 720 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 721 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 722 ), 723 exp.UnixToStr: lambda self, e: self.func( 724 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 725 ), 726 exp.DatetimeTrunc: lambda self, e: self.func( 727 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 728 ), 729 exp.UnixToTime: _unix_to_time_sql, 730 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 731 exp.VariancePop: rename_func("VAR_POP"), 732 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 733 exp.Xor: bool_xor_sql, 734 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 735 rename_func("LEVENSHTEIN") 736 ), 737 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 738 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 739 exp.DateBin: rename_func("TIME_BUCKET"), 740 } 741 742 SUPPORTED_JSON_PATH_PARTS = { 743 exp.JSONPathKey, 744 exp.JSONPathRoot, 745 exp.JSONPathSubscript, 746 exp.JSONPathWildcard, 747 } 748 749 TYPE_MAPPING = { 750 **generator.Generator.TYPE_MAPPING, 751 exp.DataType.Type.BINARY: "BLOB", 752 exp.DataType.Type.BPCHAR: "TEXT", 753 exp.DataType.Type.CHAR: "TEXT", 754 exp.DataType.Type.DATETIME: "TIMESTAMP", 755 exp.DataType.Type.FLOAT: "REAL", 756 exp.DataType.Type.JSONB: "JSON", 757 exp.DataType.Type.NCHAR: "TEXT", 758 exp.DataType.Type.NVARCHAR: "TEXT", 759 exp.DataType.Type.UINT: "UINTEGER", 760 exp.DataType.Type.VARBINARY: "BLOB", 761 exp.DataType.Type.ROWVERSION: "BLOB", 762 exp.DataType.Type.VARCHAR: "TEXT", 763 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 764 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 765 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 766 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 767 } 768 769 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 770 RESERVED_KEYWORDS = { 771 "array", 772 "analyse", 773 "union", 774 "all", 775 "when", 776 "in_p", 777 "default", 778 "create_p", 779 "window", 780 "asymmetric", 781 "to", 782 "else", 783 "localtime", 784 "from", 785 "end_p", 786 "select", 787 "current_date", 788 "foreign", 789 "with", 790 "grant", 791 "session_user", 792 "or", 793 "except", 794 "references", 795 "fetch", 796 "limit", 797 "group_p", 798 "leading", 799 "into", 800 "collate", 801 "offset", 802 "do", 803 "then", 804 "localtimestamp", 805 "check_p", 806 "lateral_p", 807 "current_role", 808 "where", 809 "asc_p", 810 "placing", 811 "desc_p", 812 "user", 813 "unique", 814 "initially", 815 "column", 816 "both", 817 "some", 818 "as", 819 "any", 820 "only", 821 "deferrable", 822 "null_p", 823 "current_time", 824 "true_p", 825 "table", 826 "case", 827 "trailing", 828 "variadic", 829 "for", 830 "on", 831 "distinct", 832 "false_p", 833 "not", 834 "constraint", 835 "current_timestamp", 836 "returning", 837 "primary", 838 "intersect", 839 "having", 840 "analyze", 841 "current_user", 842 "and", 843 "cast", 844 "symmetric", 845 "using", 846 "order", 847 "current_catalog", 848 } 849 850 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 851 852 # DuckDB doesn't generally support CREATE TABLE .. properties 853 # https://duckdb.org/docs/sql/statements/create_table.html 854 PROPERTIES_LOCATION = { 855 prop: exp.Properties.Location.UNSUPPORTED 856 for prop in generator.Generator.PROPERTIES_LOCATION 857 } 858 859 # There are a few exceptions (e.g. temporary tables) which are supported or 860 # can be transpiled to DuckDB, so we explicitly override them accordingly 861 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 862 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 863 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 864 865 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 866 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 867 868 def strtotime_sql(self, expression: exp.StrToTime) -> str: 869 if expression.args.get("safe"): 870 formatted_time = self.format_time(expression) 871 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 872 return str_to_time_sql(self, expression) 873 874 def strtodate_sql(self, expression: exp.StrToDate) -> str: 875 if expression.args.get("safe"): 876 formatted_time = self.format_time(expression) 877 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 878 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 879 880 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 881 arg = expression.this 882 if expression.args.get("safe"): 883 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 884 return self.func("JSON", arg) 885 886 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 887 nano = expression.args.get("nano") 888 if nano is not None: 889 expression.set( 890 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 891 ) 892 893 return rename_func("MAKE_TIME")(self, expression) 894 895 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 896 sec = expression.args["sec"] 897 898 milli = expression.args.get("milli") 899 if milli is not None: 900 sec += milli.pop() / exp.Literal.number(1000.0) 901 902 nano = expression.args.get("nano") 903 if nano is not None: 904 sec += nano.pop() / exp.Literal.number(1000000000.0) 905 906 if milli or nano: 907 expression.set("sec", sec) 908 909 return rename_func("MAKE_TIMESTAMP")(self, expression) 910 911 def tablesample_sql( 912 self, 913 expression: exp.TableSample, 914 tablesample_keyword: t.Optional[str] = None, 915 ) -> str: 916 if not isinstance(expression.parent, exp.Select): 917 # This sample clause only applies to a single source, not the entire resulting relation 918 tablesample_keyword = "TABLESAMPLE" 919 920 if expression.args.get("size"): 921 method = expression.args.get("method") 922 if method and method.name.upper() != "RESERVOIR": 923 self.unsupported( 924 f"Sampling method {method} is not supported with a discrete sample count, " 925 "defaulting to reservoir sampling" 926 ) 927 expression.set("method", exp.var("RESERVOIR")) 928 929 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 930 931 def interval_sql(self, expression: exp.Interval) -> str: 932 multiplier: t.Optional[int] = None 933 unit = expression.text("unit").lower() 934 935 if unit.startswith("week"): 936 multiplier = 7 937 if unit.startswith("quarter"): 938 multiplier = 90 939 940 if multiplier: 941 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 942 943 return super().interval_sql(expression) 944 945 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 946 if isinstance(expression.parent, exp.UserDefinedFunction): 947 return self.sql(expression, "this") 948 return super().columndef_sql(expression, sep) 949 950 def join_sql(self, expression: exp.Join) -> str: 951 if ( 952 expression.side == "LEFT" 953 and not expression.args.get("on") 954 and isinstance(expression.this, exp.Unnest) 955 ): 956 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 957 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 958 return super().join_sql(expression.on(exp.true())) 959 960 return super().join_sql(expression) 961 962 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 963 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 964 if expression.args.get("is_end_exclusive"): 965 return rename_func("RANGE")(self, expression) 966 967 return self.function_fallback_sql(expression) 968 969 def countif_sql(self, expression: exp.CountIf) -> str: 970 if self.dialect.version >= Version("1.2"): 971 return self.function_fallback_sql(expression) 972 973 # https://github.com/tobymao/sqlglot/pull/4749 974 return count_if_to_sum(self, expression) 975 976 def bracket_sql(self, expression: exp.Bracket) -> str: 977 if self.dialect.version >= Version("1.2"): 978 return super().bracket_sql(expression) 979 980 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 981 this = expression.this 982 if isinstance(this, exp.Array): 983 this.replace(exp.paren(this)) 984 985 bracket = super().bracket_sql(expression) 986 987 if not expression.args.get("returns_list_for_maps"): 988 if not this.type: 989 from sqlglot.optimizer.annotate_types import annotate_types 990 991 this = annotate_types(this) 992 993 if this.is_type(exp.DataType.Type.MAP): 994 bracket = f"({bracket})[1]" 995 996 return bracket 997 998 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 999 expression_sql = self.sql(expression, "expression") 1000 1001 func = expression.this 1002 if isinstance(func, exp.PERCENTILES): 1003 # Make the order key the first arg and slide the fraction to the right 1004 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1005 order_col = expression.find(exp.Ordered) 1006 if order_col: 1007 func.set("expression", func.this) 1008 func.set("this", order_col.this) 1009 1010 this = self.sql(expression, "this").rstrip(")") 1011 1012 return f"{this}{expression_sql})" 1013 1014 def length_sql(self, expression: exp.Length) -> str: 1015 arg = expression.this 1016 1017 # Dialects like BQ and Snowflake also accept binary values as args, so 1018 # DDB will attempt to infer the type or resort to case/when resolution 1019 if not expression.args.get("binary") or arg.is_string: 1020 return self.func("LENGTH", arg) 1021 1022 if not arg.type: 1023 from sqlglot.optimizer.annotate_types import annotate_types 1024 1025 arg = annotate_types(arg) 1026 1027 if arg.is_type(*exp.DataType.TEXT_TYPES): 1028 return self.func("LENGTH", arg) 1029 1030 # We need these casts to make duckdb's static type checker happy 1031 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1032 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1033 1034 case = ( 1035 exp.case(self.func("TYPEOF", arg)) 1036 .when( 1037 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1038 ) # anonymous to break length_sql recursion 1039 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1040 ) 1041 1042 return self.sql(case) 1043 1044 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1045 this = expression.this 1046 key = expression.args.get("key") 1047 key_sql = key.name if isinstance(key, exp.Expression) else "" 1048 value_sql = self.sql(expression, "value") 1049 1050 kv_sql = f"{key_sql} := {value_sql}" 1051 1052 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1053 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1054 if isinstance(this, exp.Struct) and not this.expressions: 1055 return self.func("STRUCT_PACK", kv_sql) 1056 1057 return self.func("STRUCT_INSERT", this, kv_sql) 1058 1059 def unnest_sql(self, expression: exp.Unnest) -> str: 1060 explode_array = expression.args.get("explode_array") 1061 if explode_array: 1062 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1063 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1064 expression.expressions.append( 1065 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1066 ) 1067 1068 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1069 alias = expression.args.get("alias") 1070 if alias: 1071 expression.set("alias", None) 1072 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1073 1074 unnest_sql = super().unnest_sql(expression) 1075 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1076 return self.sql(select) 1077 1078 return super().unnest_sql(expression) 1079 1080 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1081 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1082 # DuckDB should render IGNORE NULLS only for the general-purpose 1083 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1084 return super().ignorenulls_sql(expression) 1085 1086 return self.sql(expression, "this") 1087 1088 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1089 this = self.sql(expression, "this") 1090 null_text = self.sql(expression, "null") 1091 1092 if null_text: 1093 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1094 1095 return self.func("ARRAY_TO_STRING", this, expression.expression) 1096 1097 @unsupported_args("position", "occurrence") 1098 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1099 group = expression.args.get("group") 1100 params = expression.args.get("parameters") 1101 1102 # Do not render group if there is no following argument, 1103 # and it's the default value for this dialect 1104 if ( 1105 not params 1106 and group 1107 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1108 ): 1109 group = None 1110 return self.func( 1111 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1112 ) 1113 1114 @unsupported_args("culture") 1115 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1116 fmt = expression.args.get("format") 1117 if fmt and fmt.is_int: 1118 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1119 1120 self.unsupported("Only integer formats are supported by NumberToStr") 1121 return self.function_fallback_sql(expression) 1122 1123 def autoincrementcolumnconstraint_sql(self, _) -> str: 1124 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1125 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
874 def strtodate_sql(self, expression: exp.StrToDate) -> str: 875 if expression.args.get("safe"): 876 formatted_time = self.format_time(expression) 877 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 878 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
886 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 887 nano = expression.args.get("nano") 888 if nano is not None: 889 expression.set( 890 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 891 ) 892 893 return rename_func("MAKE_TIME")(self, expression)
895 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 896 sec = expression.args["sec"] 897 898 milli = expression.args.get("milli") 899 if milli is not None: 900 sec += milli.pop() / exp.Literal.number(1000.0) 901 902 nano = expression.args.get("nano") 903 if nano is not None: 904 sec += nano.pop() / exp.Literal.number(1000000000.0) 905 906 if milli or nano: 907 expression.set("sec", sec) 908 909 return rename_func("MAKE_TIMESTAMP")(self, expression)
911 def tablesample_sql( 912 self, 913 expression: exp.TableSample, 914 tablesample_keyword: t.Optional[str] = None, 915 ) -> str: 916 if not isinstance(expression.parent, exp.Select): 917 # This sample clause only applies to a single source, not the entire resulting relation 918 tablesample_keyword = "TABLESAMPLE" 919 920 if expression.args.get("size"): 921 method = expression.args.get("method") 922 if method and method.name.upper() != "RESERVOIR": 923 self.unsupported( 924 f"Sampling method {method} is not supported with a discrete sample count, " 925 "defaulting to reservoir sampling" 926 ) 927 expression.set("method", exp.var("RESERVOIR")) 928 929 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
931 def interval_sql(self, expression: exp.Interval) -> str: 932 multiplier: t.Optional[int] = None 933 unit = expression.text("unit").lower() 934 935 if unit.startswith("week"): 936 multiplier = 7 937 if unit.startswith("quarter"): 938 multiplier = 90 939 940 if multiplier: 941 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 942 943 return super().interval_sql(expression)
950 def join_sql(self, expression: exp.Join) -> str: 951 if ( 952 expression.side == "LEFT" 953 and not expression.args.get("on") 954 and isinstance(expression.this, exp.Unnest) 955 ): 956 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 957 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 958 return super().join_sql(expression.on(exp.true())) 959 960 return super().join_sql(expression)
976 def bracket_sql(self, expression: exp.Bracket) -> str: 977 if self.dialect.version >= Version("1.2"): 978 return super().bracket_sql(expression) 979 980 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 981 this = expression.this 982 if isinstance(this, exp.Array): 983 this.replace(exp.paren(this)) 984 985 bracket = super().bracket_sql(expression) 986 987 if not expression.args.get("returns_list_for_maps"): 988 if not this.type: 989 from sqlglot.optimizer.annotate_types import annotate_types 990 991 this = annotate_types(this) 992 993 if this.is_type(exp.DataType.Type.MAP): 994 bracket = f"({bracket})[1]" 995 996 return bracket
998 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 999 expression_sql = self.sql(expression, "expression") 1000 1001 func = expression.this 1002 if isinstance(func, exp.PERCENTILES): 1003 # Make the order key the first arg and slide the fraction to the right 1004 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1005 order_col = expression.find(exp.Ordered) 1006 if order_col: 1007 func.set("expression", func.this) 1008 func.set("this", order_col.this) 1009 1010 this = self.sql(expression, "this").rstrip(")") 1011 1012 return f"{this}{expression_sql})"
1014 def length_sql(self, expression: exp.Length) -> str: 1015 arg = expression.this 1016 1017 # Dialects like BQ and Snowflake also accept binary values as args, so 1018 # DDB will attempt to infer the type or resort to case/when resolution 1019 if not expression.args.get("binary") or arg.is_string: 1020 return self.func("LENGTH", arg) 1021 1022 if not arg.type: 1023 from sqlglot.optimizer.annotate_types import annotate_types 1024 1025 arg = annotate_types(arg) 1026 1027 if arg.is_type(*exp.DataType.TEXT_TYPES): 1028 return self.func("LENGTH", arg) 1029 1030 # We need these casts to make duckdb's static type checker happy 1031 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1032 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1033 1034 case = ( 1035 exp.case(self.func("TYPEOF", arg)) 1036 .when( 1037 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1038 ) # anonymous to break length_sql recursion 1039 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1040 ) 1041 1042 return self.sql(case)
1044 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1045 this = expression.this 1046 key = expression.args.get("key") 1047 key_sql = key.name if isinstance(key, exp.Expression) else "" 1048 value_sql = self.sql(expression, "value") 1049 1050 kv_sql = f"{key_sql} := {value_sql}" 1051 1052 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1053 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1054 if isinstance(this, exp.Struct) and not this.expressions: 1055 return self.func("STRUCT_PACK", kv_sql) 1056 1057 return self.func("STRUCT_INSERT", this, kv_sql)
1059 def unnest_sql(self, expression: exp.Unnest) -> str: 1060 explode_array = expression.args.get("explode_array") 1061 if explode_array: 1062 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1063 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1064 expression.expressions.append( 1065 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1066 ) 1067 1068 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1069 alias = expression.args.get("alias") 1070 if alias: 1071 expression.set("alias", None) 1072 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1073 1074 unnest_sql = super().unnest_sql(expression) 1075 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1076 return self.sql(select) 1077 1078 return super().unnest_sql(expression)
1080 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1081 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1082 # DuckDB should render IGNORE NULLS only for the general-purpose 1083 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1084 return super().ignorenulls_sql(expression) 1085 1086 return self.sql(expression, "this")
1088 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1089 this = self.sql(expression, "this") 1090 null_text = self.sql(expression, "null") 1091 1092 if null_text: 1093 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1094 1095 return self.func("ARRAY_TO_STRING", this, expression.expression)
1097 @unsupported_args("position", "occurrence") 1098 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1099 group = expression.args.get("group") 1100 params = expression.args.get("parameters") 1101 1102 # Do not render group if there is no following argument, 1103 # and it's the default value for this dialect 1104 if ( 1105 not params 1106 and group 1107 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1108 ): 1109 group = None 1110 return self.func( 1111 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1112 )
1114 @unsupported_args("culture") 1115 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1116 fmt = expression.args.get("format") 1117 if fmt and fmt.is_int: 1118 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1119 1120 self.unsupported("Only integer formats are supported by NumberToStr") 1121 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql