sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38) 39from sqlglot.helper import seq_get 40from sqlglot.tokens import TokenType 41from sqlglot.parser import binary_range_parser 42 43DATETIME_DELTA = t.Union[ 44 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 45] 46 47WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 48 exp.FirstValue, 49 exp.LastValue, 50 exp.Lag, 51 exp.Lead, 52 exp.NthValue, 53) 54 55 56def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 57 this = expression.this 58 unit = unit_to_var(expression) 59 op = ( 60 "+" 61 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 62 else "-" 63 ) 64 65 to_type: t.Optional[DATA_TYPE] = None 66 if isinstance(expression, exp.TsOrDsAdd): 67 to_type = expression.return_type 68 elif this.is_string: 69 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 70 to_type = ( 71 exp.DataType.Type.DATETIME 72 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 73 else exp.DataType.Type.DATE 74 ) 75 76 this = exp.cast(this, to_type) if to_type else this 77 78 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 79 80 81# BigQuery -> DuckDB conversion for the DATE function 82def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 83 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 84 zone = self.sql(expression, "zone") 85 86 if zone: 87 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 88 date_str = f"{date_str} || ' ' || {zone}" 89 90 # This will create a TIMESTAMP with time zone information 91 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 92 93 return result 94 95 96# BigQuery -> DuckDB conversion for the TIME_DIFF function 97def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 98 this = exp.cast(expression.this, exp.DataType.Type.TIME) 99 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 100 101 # Although the 2 dialects share similar signatures, BQ seems to inverse 102 # the sign of the result so the start/end time operands are flipped 103 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 104 105 106@generator.unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 107def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 108 return self.func("ARRAY_SORT", expression.this) 109 110 111def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 112 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 113 return self.func(name, expression.this) 114 115 116def _build_sort_array_desc(args: t.List) -> exp.Expression: 117 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 118 119 120def _build_date_diff(args: t.List) -> exp.Expression: 121 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 122 123 124def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 125 def _builder(args: t.List) -> exp.GenerateSeries: 126 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 127 if len(args) == 1: 128 # DuckDB uses 0 as a default for the series' start when it's omitted 129 args.insert(0, exp.Literal.number("0")) 130 131 gen_series = exp.GenerateSeries.from_arg_list(args) 132 gen_series.set("is_end_exclusive", end_exclusive) 133 134 return gen_series 135 136 return _builder 137 138 139def _build_make_timestamp(args: t.List) -> exp.Expression: 140 if len(args) == 1: 141 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 142 143 return exp.TimestampFromParts( 144 year=seq_get(args, 0), 145 month=seq_get(args, 1), 146 day=seq_get(args, 2), 147 hour=seq_get(args, 3), 148 min=seq_get(args, 4), 149 sec=seq_get(args, 5), 150 ) 151 152 153def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 154 args: t.List[str] = [] 155 156 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 157 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 158 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 159 ancestor_cast = expression.find_ancestor(exp.Cast) 160 is_struct_cast = ancestor_cast and any( 161 casted_type.is_type(exp.DataType.Type.STRUCT) 162 for casted_type in ancestor_cast.find_all(exp.DataType) 163 ) 164 165 for i, expr in enumerate(expression.expressions): 166 is_property_eq = isinstance(expr, exp.PropertyEQ) 167 value = expr.expression if is_property_eq else expr 168 169 if is_struct_cast: 170 args.append(self.sql(value)) 171 else: 172 key = expr.name if is_property_eq else f"_{i}" 173 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 174 175 csv_args = ", ".join(args) 176 177 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 178 179 180def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 181 if expression.is_type("array"): 182 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 183 184 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 185 if expression.is_type( 186 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 187 ): 188 return expression.this.value 189 190 return self.datatype_sql(expression) 191 192 193def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 194 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 195 return f"CAST({sql} AS TEXT)" 196 197 198def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 199 scale = expression.args.get("scale") 200 timestamp = expression.this 201 202 if scale in (None, exp.UnixToTime.SECONDS): 203 return self.func("TO_TIMESTAMP", timestamp) 204 if scale == exp.UnixToTime.MILLIS: 205 return self.func("EPOCH_MS", timestamp) 206 if scale == exp.UnixToTime.MICROS: 207 return self.func("MAKE_TIMESTAMP", timestamp) 208 209 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 210 211 212WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 213 214 215def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 216 arrow_sql = arrow_json_extract_sql(self, expression) 217 if not expression.same_parent and isinstance( 218 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 219 ): 220 arrow_sql = self.wrap(arrow_sql) 221 return arrow_sql 222 223 224def _implicit_datetime_cast( 225 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 226) -> t.Optional[exp.Expression]: 227 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 228 229 230def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 231 this = _implicit_datetime_cast(expression.this) 232 expr = _implicit_datetime_cast(expression.expression) 233 234 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 235 236 237def _generate_datetime_array_sql( 238 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 239) -> str: 240 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 241 242 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 243 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 244 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 245 246 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 247 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 248 start=start, end=end, step=expression.args.get("step") 249 ) 250 251 if is_generate_date_array: 252 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 253 # GENERATE_DATE_ARRAY we must cast it back to DATE array 254 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 255 256 return self.sql(gen_series) 257 258 259class DuckDB(Dialect): 260 NULL_ORDERING = "nulls_are_last" 261 SUPPORTS_USER_DEFINED_TYPES = False 262 SAFE_DIVISION = True 263 INDEX_OFFSET = 1 264 CONCAT_COALESCE = True 265 SUPPORTS_ORDER_BY_ALL = True 266 SUPPORTS_FIXED_SIZE_ARRAYS = True 267 STRICT_JSON_PATH_SYNTAX = False 268 269 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 270 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 271 272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path) 283 284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "**": TokenType.DSTAR, 294 "^@": TokenType.CARET_AT, 295 "@>": TokenType.AT_GT, 296 "<@": TokenType.LT_AT, 297 "ATTACH": TokenType.COMMAND, 298 "BINARY": TokenType.VARBINARY, 299 "BITSTRING": TokenType.BIT, 300 "BPCHAR": TokenType.TEXT, 301 "CHAR": TokenType.TEXT, 302 "CHARACTER VARYING": TokenType.TEXT, 303 "EXCLUDE": TokenType.EXCEPT, 304 "LOGICAL": TokenType.BOOLEAN, 305 "ONLY": TokenType.ONLY, 306 "PIVOT_WIDER": TokenType.PIVOT, 307 "POSITIONAL": TokenType.POSITIONAL, 308 "SIGNED": TokenType.INT, 309 "STRING": TokenType.TEXT, 310 "SUMMARIZE": TokenType.SUMMARIZE, 311 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 312 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 313 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 314 "TIMESTAMP_US": TokenType.TIMESTAMP, 315 "UBIGINT": TokenType.UBIGINT, 316 "UINTEGER": TokenType.UINT, 317 "USMALLINT": TokenType.USMALLINT, 318 "UTINYINT": TokenType.UTINYINT, 319 "VARCHAR": TokenType.TEXT, 320 } 321 KEYWORDS.pop("/*+") 322 323 SINGLE_TOKENS = { 324 **tokens.Tokenizer.SINGLE_TOKENS, 325 "$": TokenType.PARAMETER, 326 } 327 328 class Parser(parser.Parser): 329 BITWISE = { 330 **parser.Parser.BITWISE, 331 TokenType.TILDA: exp.RegexpLike, 332 } 333 BITWISE.pop(TokenType.CARET) 334 335 RANGE_PARSERS = { 336 **parser.Parser.RANGE_PARSERS, 337 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 338 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 339 } 340 341 EXPONENT = { 342 **parser.Parser.EXPONENT, 343 TokenType.CARET: exp.Pow, 344 TokenType.DSTAR: exp.Pow, 345 } 346 347 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 352 "ARRAY_SORT": exp.SortArray.from_arg_list, 353 "DATEDIFF": _build_date_diff, 354 "DATE_DIFF": _build_date_diff, 355 "DATE_TRUNC": date_trunc_to_time, 356 "DATETRUNC": date_trunc_to_time, 357 "DECODE": lambda args: exp.Decode( 358 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 359 ), 360 "ENCODE": lambda args: exp.Encode( 361 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 362 ), 363 "EPOCH": exp.TimeToUnix.from_arg_list, 364 "EPOCH_MS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 366 ), 367 "JSON": exp.ParseJSON.from_arg_list, 368 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 369 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 370 "LIST_HAS": exp.ArrayContains.from_arg_list, 371 "LIST_REVERSE_SORT": _build_sort_array_desc, 372 "LIST_SORT": exp.SortArray.from_arg_list, 373 "LIST_VALUE": lambda args: exp.Array(expressions=args), 374 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 375 "MAKE_TIMESTAMP": _build_make_timestamp, 376 "MEDIAN": lambda args: exp.PercentileCont( 377 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 378 ), 379 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 380 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 381 "REGEXP_EXTRACT": build_regexp_extract, 382 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 383 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 384 this=seq_get(args, 0), 385 expression=seq_get(args, 1), 386 replacement=seq_get(args, 2), 387 modifiers=seq_get(args, 3), 388 ), 389 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 390 "STRING_SPLIT": exp.Split.from_arg_list, 391 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 392 "STRING_TO_ARRAY": exp.Split.from_arg_list, 393 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 394 "STRUCT_PACK": exp.Struct.from_arg_list, 395 "STR_SPLIT": exp.Split.from_arg_list, 396 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 397 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 398 "UNNEST": exp.Explode.from_arg_list, 399 "XOR": binary_from_function(exp.BitwiseXor), 400 "GENERATE_SERIES": _build_generate_series(), 401 "RANGE": _build_generate_series(end_exclusive=True), 402 } 403 404 FUNCTIONS.pop("DATE_SUB") 405 FUNCTIONS.pop("GLOB") 406 407 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 408 FUNCTION_PARSERS.pop("DECODE") 409 410 NO_PAREN_FUNCTION_PARSERS = { 411 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 412 "MAP": lambda self: self._parse_map(), 413 } 414 415 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 416 TokenType.SEMI, 417 TokenType.ANTI, 418 } 419 420 PLACEHOLDER_PARSERS = { 421 **parser.Parser.PLACEHOLDER_PARSERS, 422 TokenType.PARAMETER: lambda self: ( 423 self.expression(exp.Placeholder, this=self._prev.text) 424 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 425 else None 426 ), 427 } 428 429 TYPE_CONVERTERS = { 430 # https://duckdb.org/docs/sql/data_types/numeric 431 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 432 # https://duckdb.org/docs/sql/data_types/text 433 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 434 } 435 436 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 437 # https://duckdb.org/docs/sql/samples.html 438 sample = super()._parse_table_sample(as_modifier=as_modifier) 439 if sample and not sample.args.get("method"): 440 if sample.args.get("size"): 441 sample.set("method", exp.var("RESERVOIR")) 442 else: 443 sample.set("method", exp.var("SYSTEM")) 444 445 return sample 446 447 def _parse_bracket( 448 self, this: t.Optional[exp.Expression] = None 449 ) -> t.Optional[exp.Expression]: 450 bracket = super()._parse_bracket(this) 451 if isinstance(bracket, exp.Bracket): 452 bracket.set("returns_list_for_maps", True) 453 454 return bracket 455 456 def _parse_map(self) -> exp.ToMap | exp.Map: 457 if self._match(TokenType.L_BRACE, advance=False): 458 return self.expression(exp.ToMap, this=self._parse_bracket()) 459 460 args = self._parse_wrapped_csv(self._parse_assignment) 461 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 462 463 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 464 return self._parse_field_def() 465 466 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 467 if len(aggregations) == 1: 468 return super()._pivot_column_names(aggregations) 469 return pivot_column_names(aggregations, dialect="duckdb") 470 471 class Generator(generator.Generator): 472 PARAMETER_TOKEN = "$" 473 NAMED_PLACEHOLDER_TOKEN = "$" 474 JOIN_HINTS = False 475 TABLE_HINTS = False 476 QUERY_HINTS = False 477 LIMIT_FETCH = "LIMIT" 478 STRUCT_DELIMITER = ("(", ")") 479 RENAME_TABLE_WITH_DB = False 480 NVL2_SUPPORTED = False 481 SEMI_ANTI_JOIN_WITH_SIDE = False 482 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 483 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 484 LAST_DAY_SUPPORTS_DATE_PART = False 485 JSON_KEY_VALUE_PAIR_SEP = "," 486 IGNORE_NULLS_IN_FUNC = True 487 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 488 SUPPORTS_CREATE_TABLE_LIKE = False 489 MULTI_ARG_DISTINCT = False 490 CAN_IMPLEMENT_ARRAY_ANY = True 491 SUPPORTS_TO_NUMBER = False 492 COPY_HAS_INTO_KEYWORD = False 493 STAR_EXCEPT = "EXCLUDE" 494 PAD_FILL_PATTERN_IS_REQUIRED = True 495 ARRAY_CONCAT_IS_VAR_LEN = False 496 497 TRANSFORMS = { 498 **generator.Generator.TRANSFORMS, 499 exp.ApproxDistinct: approx_count_distinct_sql, 500 exp.Array: inline_array_unless_query, 501 exp.ArrayFilter: rename_func("LIST_FILTER"), 502 exp.ArraySize: rename_func("ARRAY_LENGTH"), 503 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 504 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 505 exp.ArraySort: _array_sort_sql, 506 exp.ArraySum: rename_func("LIST_SUM"), 507 exp.BitwiseXor: rename_func("XOR"), 508 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 509 exp.CurrentDate: lambda *_: "CURRENT_DATE", 510 exp.CurrentTime: lambda *_: "CURRENT_TIME", 511 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 512 exp.DayOfMonth: rename_func("DAYOFMONTH"), 513 exp.DayOfWeek: rename_func("DAYOFWEEK"), 514 exp.DayOfWeekIso: rename_func("ISODOW"), 515 exp.DayOfYear: rename_func("DAYOFYEAR"), 516 exp.DataType: _datatype_sql, 517 exp.Date: _date_sql, 518 exp.DateAdd: _date_delta_sql, 519 exp.DateFromParts: rename_func("MAKE_DATE"), 520 exp.DateSub: _date_delta_sql, 521 exp.DateDiff: _date_diff_sql, 522 exp.DateStrToDate: datestrtodate_sql, 523 exp.Datetime: no_datetime_sql, 524 exp.DatetimeSub: _date_delta_sql, 525 exp.DatetimeAdd: _date_delta_sql, 526 exp.DateToDi: lambda self, 527 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 528 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 529 exp.DiToDate: lambda self, 530 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 531 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 532 exp.GenerateDateArray: _generate_datetime_array_sql, 533 exp.GenerateTimestampArray: _generate_datetime_array_sql, 534 exp.Explode: rename_func("UNNEST"), 535 exp.IntDiv: lambda self, e: self.binary(e, "//"), 536 exp.IsInf: rename_func("ISINF"), 537 exp.IsNan: rename_func("ISNAN"), 538 exp.JSONExtract: _arrow_json_extract_sql, 539 exp.JSONExtractScalar: _arrow_json_extract_sql, 540 exp.JSONFormat: _json_format_sql, 541 exp.LogicalOr: rename_func("BOOL_OR"), 542 exp.LogicalAnd: rename_func("BOOL_AND"), 543 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 544 exp.MonthsBetween: lambda self, e: self.func( 545 "DATEDIFF", 546 "'month'", 547 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 548 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 549 ), 550 exp.PercentileCont: rename_func("QUANTILE_CONT"), 551 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 552 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 553 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 554 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 555 exp.RegexpExtract: regexp_extract_sql, 556 exp.RegexpReplace: lambda self, e: self.func( 557 "REGEXP_REPLACE", 558 e.this, 559 e.expression, 560 e.args.get("replacement"), 561 e.args.get("modifiers"), 562 ), 563 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 564 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 565 exp.Return: lambda self, e: self.sql(e, "this"), 566 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 567 exp.Rand: rename_func("RANDOM"), 568 exp.SafeDivide: no_safe_divide_sql, 569 exp.SHA: rename_func("SHA1"), 570 exp.SHA2: sha256_sql, 571 exp.Split: rename_func("STR_SPLIT"), 572 exp.SortArray: _sort_array_sql, 573 exp.StrPosition: str_position_sql, 574 exp.StrToUnix: lambda self, e: self.func( 575 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 576 ), 577 exp.Struct: _struct_sql, 578 exp.Transform: rename_func("LIST_TRANSFORM"), 579 exp.TimeAdd: _date_delta_sql, 580 exp.Time: no_time_sql, 581 exp.TimeDiff: _timediff_sql, 582 exp.Timestamp: no_timestamp_sql, 583 exp.TimestampDiff: lambda self, e: self.func( 584 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 585 ), 586 exp.TimestampTrunc: timestamptrunc_sql(), 587 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 588 exp.TimeStrToTime: timestrtotime_sql, 589 exp.TimeStrToUnix: lambda self, e: self.func( 590 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 591 ), 592 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 593 exp.TimeToUnix: rename_func("EPOCH"), 594 exp.TsOrDiToDi: lambda self, 595 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 596 exp.TsOrDsAdd: _date_delta_sql, 597 exp.TsOrDsDiff: lambda self, e: self.func( 598 "DATE_DIFF", 599 f"'{e.args.get('unit') or 'DAY'}'", 600 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 601 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 602 ), 603 exp.UnixToStr: lambda self, e: self.func( 604 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 605 ), 606 exp.DatetimeTrunc: lambda self, e: self.func( 607 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 608 ), 609 exp.UnixToTime: _unix_to_time_sql, 610 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 611 exp.VariancePop: rename_func("VAR_POP"), 612 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 613 exp.Xor: bool_xor_sql, 614 } 615 616 SUPPORTED_JSON_PATH_PARTS = { 617 exp.JSONPathKey, 618 exp.JSONPathRoot, 619 exp.JSONPathSubscript, 620 exp.JSONPathWildcard, 621 } 622 623 TYPE_MAPPING = { 624 **generator.Generator.TYPE_MAPPING, 625 exp.DataType.Type.BINARY: "BLOB", 626 exp.DataType.Type.BPCHAR: "TEXT", 627 exp.DataType.Type.CHAR: "TEXT", 628 exp.DataType.Type.FLOAT: "REAL", 629 exp.DataType.Type.NCHAR: "TEXT", 630 exp.DataType.Type.NVARCHAR: "TEXT", 631 exp.DataType.Type.UINT: "UINTEGER", 632 exp.DataType.Type.VARBINARY: "BLOB", 633 exp.DataType.Type.ROWVERSION: "BLOB", 634 exp.DataType.Type.VARCHAR: "TEXT", 635 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 636 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 637 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 638 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 639 } 640 641 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 642 RESERVED_KEYWORDS = { 643 "array", 644 "analyse", 645 "union", 646 "all", 647 "when", 648 "in_p", 649 "default", 650 "create_p", 651 "window", 652 "asymmetric", 653 "to", 654 "else", 655 "localtime", 656 "from", 657 "end_p", 658 "select", 659 "current_date", 660 "foreign", 661 "with", 662 "grant", 663 "session_user", 664 "or", 665 "except", 666 "references", 667 "fetch", 668 "limit", 669 "group_p", 670 "leading", 671 "into", 672 "collate", 673 "offset", 674 "do", 675 "then", 676 "localtimestamp", 677 "check_p", 678 "lateral_p", 679 "current_role", 680 "where", 681 "asc_p", 682 "placing", 683 "desc_p", 684 "user", 685 "unique", 686 "initially", 687 "column", 688 "both", 689 "some", 690 "as", 691 "any", 692 "only", 693 "deferrable", 694 "null_p", 695 "current_time", 696 "true_p", 697 "table", 698 "case", 699 "trailing", 700 "variadic", 701 "for", 702 "on", 703 "distinct", 704 "false_p", 705 "not", 706 "constraint", 707 "current_timestamp", 708 "returning", 709 "primary", 710 "intersect", 711 "having", 712 "analyze", 713 "current_user", 714 "and", 715 "cast", 716 "symmetric", 717 "using", 718 "order", 719 "current_catalog", 720 } 721 722 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 723 724 # DuckDB doesn't generally support CREATE TABLE .. properties 725 # https://duckdb.org/docs/sql/statements/create_table.html 726 PROPERTIES_LOCATION = { 727 prop: exp.Properties.Location.UNSUPPORTED 728 for prop in generator.Generator.PROPERTIES_LOCATION 729 } 730 731 # There are a few exceptions (e.g. temporary tables) which are supported or 732 # can be transpiled to DuckDB, so we explicitly override them accordingly 733 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 734 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 735 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 736 737 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 738 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 739 740 def strtotime_sql(self, expression: exp.StrToTime) -> str: 741 if expression.args.get("safe"): 742 formatted_time = self.format_time(expression) 743 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 744 return str_to_time_sql(self, expression) 745 746 def strtodate_sql(self, expression: exp.StrToDate) -> str: 747 if expression.args.get("safe"): 748 formatted_time = self.format_time(expression) 749 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 750 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 751 752 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 753 arg = expression.this 754 if expression.args.get("safe"): 755 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 756 return self.func("JSON", arg) 757 758 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 759 nano = expression.args.get("nano") 760 if nano is not None: 761 expression.set( 762 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 763 ) 764 765 return rename_func("MAKE_TIME")(self, expression) 766 767 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 768 sec = expression.args["sec"] 769 770 milli = expression.args.get("milli") 771 if milli is not None: 772 sec += milli.pop() / exp.Literal.number(1000.0) 773 774 nano = expression.args.get("nano") 775 if nano is not None: 776 sec += nano.pop() / exp.Literal.number(1000000000.0) 777 778 if milli or nano: 779 expression.set("sec", sec) 780 781 return rename_func("MAKE_TIMESTAMP")(self, expression) 782 783 def tablesample_sql( 784 self, 785 expression: exp.TableSample, 786 tablesample_keyword: t.Optional[str] = None, 787 ) -> str: 788 if not isinstance(expression.parent, exp.Select): 789 # This sample clause only applies to a single source, not the entire resulting relation 790 tablesample_keyword = "TABLESAMPLE" 791 792 if expression.args.get("size"): 793 method = expression.args.get("method") 794 if method and method.name.upper() != "RESERVOIR": 795 self.unsupported( 796 f"Sampling method {method} is not supported with a discrete sample count, " 797 "defaulting to reservoir sampling" 798 ) 799 expression.set("method", exp.var("RESERVOIR")) 800 801 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 802 803 def interval_sql(self, expression: exp.Interval) -> str: 804 multiplier: t.Optional[int] = None 805 unit = expression.text("unit").lower() 806 807 if unit.startswith("week"): 808 multiplier = 7 809 if unit.startswith("quarter"): 810 multiplier = 90 811 812 if multiplier: 813 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 814 815 return super().interval_sql(expression) 816 817 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 818 if isinstance(expression.parent, exp.UserDefinedFunction): 819 return self.sql(expression, "this") 820 return super().columndef_sql(expression, sep) 821 822 def join_sql(self, expression: exp.Join) -> str: 823 if ( 824 expression.side == "LEFT" 825 and not expression.args.get("on") 826 and isinstance(expression.this, exp.Unnest) 827 ): 828 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 829 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 830 return super().join_sql(expression.on(exp.true())) 831 832 return super().join_sql(expression) 833 834 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 835 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 836 if expression.args.get("is_end_exclusive"): 837 return rename_func("RANGE")(self, expression) 838 839 return self.function_fallback_sql(expression) 840 841 def bracket_sql(self, expression: exp.Bracket) -> str: 842 this = expression.this 843 if isinstance(this, exp.Array): 844 this.replace(exp.paren(this)) 845 846 bracket = super().bracket_sql(expression) 847 848 if not expression.args.get("returns_list_for_maps"): 849 if not this.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 this = annotate_types(this) 853 854 if this.is_type(exp.DataType.Type.MAP): 855 bracket = f"({bracket})[1]" 856 857 return bracket 858 859 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 860 expression_sql = self.sql(expression, "expression") 861 862 func = expression.this 863 if isinstance(func, exp.PERCENTILES): 864 # Make the order key the first arg and slide the fraction to the right 865 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 866 order_col = expression.find(exp.Ordered) 867 if order_col: 868 func.set("expression", func.this) 869 func.set("this", order_col.this) 870 871 this = self.sql(expression, "this").rstrip(")") 872 873 return f"{this}{expression_sql})" 874 875 def length_sql(self, expression: exp.Length) -> str: 876 arg = expression.this 877 878 # Dialects like BQ and Snowflake also accept binary values as args, so 879 # DDB will attempt to infer the type or resort to case/when resolution 880 if not expression.args.get("binary") or arg.is_string: 881 return self.func("LENGTH", arg) 882 883 if not arg.type: 884 from sqlglot.optimizer.annotate_types import annotate_types 885 886 arg = annotate_types(arg) 887 888 if arg.is_type(*exp.DataType.TEXT_TYPES): 889 return self.func("LENGTH", arg) 890 891 # We need these casts to make duckdb's static type checker happy 892 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 893 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 894 895 case = ( 896 exp.case(self.func("TYPEOF", arg)) 897 .when( 898 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 899 ) # anonymous to break length_sql recursion 900 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 901 ) 902 903 return self.sql(case) 904 905 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 906 this = expression.this 907 key = expression.args.get("key") 908 key_sql = key.name if isinstance(key, exp.Expression) else "" 909 value_sql = self.sql(expression, "value") 910 911 kv_sql = f"{key_sql} := {value_sql}" 912 913 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 914 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 915 if isinstance(this, exp.Struct) and not this.expressions: 916 return self.func("STRUCT_PACK", kv_sql) 917 918 return self.func("STRUCT_INSERT", this, kv_sql) 919 920 def unnest_sql(self, expression: exp.Unnest) -> str: 921 explode_array = expression.args.get("explode_array") 922 if explode_array: 923 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 924 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 925 expression.expressions.append( 926 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 927 ) 928 929 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 930 alias = expression.args.get("alias") 931 if alias: 932 expression.set("alias", None) 933 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 934 935 unnest_sql = super().unnest_sql(expression) 936 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 937 return self.sql(select) 938 939 return super().unnest_sql(expression) 940 941 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 942 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 943 # DuckDB should render IGNORE NULLS only for the general-purpose 944 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 945 return super().ignorenulls_sql(expression) 946 947 return self.sql(expression, "this") 948 949 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 950 this = self.sql(expression, "this") 951 null_text = self.sql(expression, "null") 952 953 if null_text: 954 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 955 956 return self.func("ARRAY_TO_STRING", this, expression.expression)
260class DuckDB(Dialect): 261 NULL_ORDERING = "nulls_are_last" 262 SUPPORTS_USER_DEFINED_TYPES = False 263 SAFE_DIVISION = True 264 INDEX_OFFSET = 1 265 CONCAT_COALESCE = True 266 SUPPORTS_ORDER_BY_ALL = True 267 SUPPORTS_FIXED_SIZE_ARRAYS = True 268 STRICT_JSON_PATH_SYNTAX = False 269 270 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 271 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 272 273 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 274 if isinstance(path, exp.Literal): 275 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 276 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 277 # This check ensures we'll avoid trying to parse these as JSON paths, which can 278 # either result in a noisy warning or in an invalid representation of the path. 279 path_text = path.name 280 if path_text.startswith("/") or "[#" in path_text: 281 return path 282 283 return super().to_json_path(path) 284 285 class Tokenizer(tokens.Tokenizer): 286 HEREDOC_STRINGS = ["$"] 287 288 HEREDOC_TAG_IS_IDENTIFIER = True 289 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "//": TokenType.DIV, 294 "**": TokenType.DSTAR, 295 "^@": TokenType.CARET_AT, 296 "@>": TokenType.AT_GT, 297 "<@": TokenType.LT_AT, 298 "ATTACH": TokenType.COMMAND, 299 "BINARY": TokenType.VARBINARY, 300 "BITSTRING": TokenType.BIT, 301 "BPCHAR": TokenType.TEXT, 302 "CHAR": TokenType.TEXT, 303 "CHARACTER VARYING": TokenType.TEXT, 304 "EXCLUDE": TokenType.EXCEPT, 305 "LOGICAL": TokenType.BOOLEAN, 306 "ONLY": TokenType.ONLY, 307 "PIVOT_WIDER": TokenType.PIVOT, 308 "POSITIONAL": TokenType.POSITIONAL, 309 "SIGNED": TokenType.INT, 310 "STRING": TokenType.TEXT, 311 "SUMMARIZE": TokenType.SUMMARIZE, 312 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 313 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 314 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 315 "TIMESTAMP_US": TokenType.TIMESTAMP, 316 "UBIGINT": TokenType.UBIGINT, 317 "UINTEGER": TokenType.UINT, 318 "USMALLINT": TokenType.USMALLINT, 319 "UTINYINT": TokenType.UTINYINT, 320 "VARCHAR": TokenType.TEXT, 321 } 322 KEYWORDS.pop("/*+") 323 324 SINGLE_TOKENS = { 325 **tokens.Tokenizer.SINGLE_TOKENS, 326 "$": TokenType.PARAMETER, 327 } 328 329 class Parser(parser.Parser): 330 BITWISE = { 331 **parser.Parser.BITWISE, 332 TokenType.TILDA: exp.RegexpLike, 333 } 334 BITWISE.pop(TokenType.CARET) 335 336 RANGE_PARSERS = { 337 **parser.Parser.RANGE_PARSERS, 338 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 339 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 340 } 341 342 EXPONENT = { 343 **parser.Parser.EXPONENT, 344 TokenType.CARET: exp.Pow, 345 TokenType.DSTAR: exp.Pow, 346 } 347 348 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 353 "ARRAY_SORT": exp.SortArray.from_arg_list, 354 "DATEDIFF": _build_date_diff, 355 "DATE_DIFF": _build_date_diff, 356 "DATE_TRUNC": date_trunc_to_time, 357 "DATETRUNC": date_trunc_to_time, 358 "DECODE": lambda args: exp.Decode( 359 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 360 ), 361 "ENCODE": lambda args: exp.Encode( 362 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 363 ), 364 "EPOCH": exp.TimeToUnix.from_arg_list, 365 "EPOCH_MS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "JSON": exp.ParseJSON.from_arg_list, 369 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 370 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 371 "LIST_HAS": exp.ArrayContains.from_arg_list, 372 "LIST_REVERSE_SORT": _build_sort_array_desc, 373 "LIST_SORT": exp.SortArray.from_arg_list, 374 "LIST_VALUE": lambda args: exp.Array(expressions=args), 375 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 376 "MAKE_TIMESTAMP": _build_make_timestamp, 377 "MEDIAN": lambda args: exp.PercentileCont( 378 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 379 ), 380 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 381 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 382 "REGEXP_EXTRACT": build_regexp_extract, 383 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 384 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 385 this=seq_get(args, 0), 386 expression=seq_get(args, 1), 387 replacement=seq_get(args, 2), 388 modifiers=seq_get(args, 3), 389 ), 390 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 391 "STRING_SPLIT": exp.Split.from_arg_list, 392 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 393 "STRING_TO_ARRAY": exp.Split.from_arg_list, 394 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 395 "STRUCT_PACK": exp.Struct.from_arg_list, 396 "STR_SPLIT": exp.Split.from_arg_list, 397 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 398 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 399 "UNNEST": exp.Explode.from_arg_list, 400 "XOR": binary_from_function(exp.BitwiseXor), 401 "GENERATE_SERIES": _build_generate_series(), 402 "RANGE": _build_generate_series(end_exclusive=True), 403 } 404 405 FUNCTIONS.pop("DATE_SUB") 406 FUNCTIONS.pop("GLOB") 407 408 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 409 FUNCTION_PARSERS.pop("DECODE") 410 411 NO_PAREN_FUNCTION_PARSERS = { 412 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 413 "MAP": lambda self: self._parse_map(), 414 } 415 416 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 PLACEHOLDER_PARSERS = { 422 **parser.Parser.PLACEHOLDER_PARSERS, 423 TokenType.PARAMETER: lambda self: ( 424 self.expression(exp.Placeholder, this=self._prev.text) 425 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 426 else None 427 ), 428 } 429 430 TYPE_CONVERTERS = { 431 # https://duckdb.org/docs/sql/data_types/numeric 432 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 433 # https://duckdb.org/docs/sql/data_types/text 434 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 435 } 436 437 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 438 # https://duckdb.org/docs/sql/samples.html 439 sample = super()._parse_table_sample(as_modifier=as_modifier) 440 if sample and not sample.args.get("method"): 441 if sample.args.get("size"): 442 sample.set("method", exp.var("RESERVOIR")) 443 else: 444 sample.set("method", exp.var("SYSTEM")) 445 446 return sample 447 448 def _parse_bracket( 449 self, this: t.Optional[exp.Expression] = None 450 ) -> t.Optional[exp.Expression]: 451 bracket = super()._parse_bracket(this) 452 if isinstance(bracket, exp.Bracket): 453 bracket.set("returns_list_for_maps", True) 454 455 return bracket 456 457 def _parse_map(self) -> exp.ToMap | exp.Map: 458 if self._match(TokenType.L_BRACE, advance=False): 459 return self.expression(exp.ToMap, this=self._parse_bracket()) 460 461 args = self._parse_wrapped_csv(self._parse_assignment) 462 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 463 464 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 465 return self._parse_field_def() 466 467 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 468 if len(aggregations) == 1: 469 return super()._pivot_column_names(aggregations) 470 return pivot_column_names(aggregations, dialect="duckdb") 471 472 class Generator(generator.Generator): 473 PARAMETER_TOKEN = "$" 474 NAMED_PLACEHOLDER_TOKEN = "$" 475 JOIN_HINTS = False 476 TABLE_HINTS = False 477 QUERY_HINTS = False 478 LIMIT_FETCH = "LIMIT" 479 STRUCT_DELIMITER = ("(", ")") 480 RENAME_TABLE_WITH_DB = False 481 NVL2_SUPPORTED = False 482 SEMI_ANTI_JOIN_WITH_SIDE = False 483 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 484 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 485 LAST_DAY_SUPPORTS_DATE_PART = False 486 JSON_KEY_VALUE_PAIR_SEP = "," 487 IGNORE_NULLS_IN_FUNC = True 488 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 489 SUPPORTS_CREATE_TABLE_LIKE = False 490 MULTI_ARG_DISTINCT = False 491 CAN_IMPLEMENT_ARRAY_ANY = True 492 SUPPORTS_TO_NUMBER = False 493 COPY_HAS_INTO_KEYWORD = False 494 STAR_EXCEPT = "EXCLUDE" 495 PAD_FILL_PATTERN_IS_REQUIRED = True 496 ARRAY_CONCAT_IS_VAR_LEN = False 497 498 TRANSFORMS = { 499 **generator.Generator.TRANSFORMS, 500 exp.ApproxDistinct: approx_count_distinct_sql, 501 exp.Array: inline_array_unless_query, 502 exp.ArrayFilter: rename_func("LIST_FILTER"), 503 exp.ArraySize: rename_func("ARRAY_LENGTH"), 504 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 505 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 506 exp.ArraySort: _array_sort_sql, 507 exp.ArraySum: rename_func("LIST_SUM"), 508 exp.BitwiseXor: rename_func("XOR"), 509 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 510 exp.CurrentDate: lambda *_: "CURRENT_DATE", 511 exp.CurrentTime: lambda *_: "CURRENT_TIME", 512 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 513 exp.DayOfMonth: rename_func("DAYOFMONTH"), 514 exp.DayOfWeek: rename_func("DAYOFWEEK"), 515 exp.DayOfWeekIso: rename_func("ISODOW"), 516 exp.DayOfYear: rename_func("DAYOFYEAR"), 517 exp.DataType: _datatype_sql, 518 exp.Date: _date_sql, 519 exp.DateAdd: _date_delta_sql, 520 exp.DateFromParts: rename_func("MAKE_DATE"), 521 exp.DateSub: _date_delta_sql, 522 exp.DateDiff: _date_diff_sql, 523 exp.DateStrToDate: datestrtodate_sql, 524 exp.Datetime: no_datetime_sql, 525 exp.DatetimeSub: _date_delta_sql, 526 exp.DatetimeAdd: _date_delta_sql, 527 exp.DateToDi: lambda self, 528 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 529 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 530 exp.DiToDate: lambda self, 531 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 532 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 533 exp.GenerateDateArray: _generate_datetime_array_sql, 534 exp.GenerateTimestampArray: _generate_datetime_array_sql, 535 exp.Explode: rename_func("UNNEST"), 536 exp.IntDiv: lambda self, e: self.binary(e, "//"), 537 exp.IsInf: rename_func("ISINF"), 538 exp.IsNan: rename_func("ISNAN"), 539 exp.JSONExtract: _arrow_json_extract_sql, 540 exp.JSONExtractScalar: _arrow_json_extract_sql, 541 exp.JSONFormat: _json_format_sql, 542 exp.LogicalOr: rename_func("BOOL_OR"), 543 exp.LogicalAnd: rename_func("BOOL_AND"), 544 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 545 exp.MonthsBetween: lambda self, e: self.func( 546 "DATEDIFF", 547 "'month'", 548 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 549 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 550 ), 551 exp.PercentileCont: rename_func("QUANTILE_CONT"), 552 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 553 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 554 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 555 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 556 exp.RegexpExtract: regexp_extract_sql, 557 exp.RegexpReplace: lambda self, e: self.func( 558 "REGEXP_REPLACE", 559 e.this, 560 e.expression, 561 e.args.get("replacement"), 562 e.args.get("modifiers"), 563 ), 564 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 565 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 566 exp.Return: lambda self, e: self.sql(e, "this"), 567 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 568 exp.Rand: rename_func("RANDOM"), 569 exp.SafeDivide: no_safe_divide_sql, 570 exp.SHA: rename_func("SHA1"), 571 exp.SHA2: sha256_sql, 572 exp.Split: rename_func("STR_SPLIT"), 573 exp.SortArray: _sort_array_sql, 574 exp.StrPosition: str_position_sql, 575 exp.StrToUnix: lambda self, e: self.func( 576 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 577 ), 578 exp.Struct: _struct_sql, 579 exp.Transform: rename_func("LIST_TRANSFORM"), 580 exp.TimeAdd: _date_delta_sql, 581 exp.Time: no_time_sql, 582 exp.TimeDiff: _timediff_sql, 583 exp.Timestamp: no_timestamp_sql, 584 exp.TimestampDiff: lambda self, e: self.func( 585 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 586 ), 587 exp.TimestampTrunc: timestamptrunc_sql(), 588 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 589 exp.TimeStrToTime: timestrtotime_sql, 590 exp.TimeStrToUnix: lambda self, e: self.func( 591 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 592 ), 593 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 594 exp.TimeToUnix: rename_func("EPOCH"), 595 exp.TsOrDiToDi: lambda self, 596 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 597 exp.TsOrDsAdd: _date_delta_sql, 598 exp.TsOrDsDiff: lambda self, e: self.func( 599 "DATE_DIFF", 600 f"'{e.args.get('unit') or 'DAY'}'", 601 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 602 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 603 ), 604 exp.UnixToStr: lambda self, e: self.func( 605 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 606 ), 607 exp.DatetimeTrunc: lambda self, e: self.func( 608 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 609 ), 610 exp.UnixToTime: _unix_to_time_sql, 611 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 612 exp.VariancePop: rename_func("VAR_POP"), 613 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 614 exp.Xor: bool_xor_sql, 615 } 616 617 SUPPORTED_JSON_PATH_PARTS = { 618 exp.JSONPathKey, 619 exp.JSONPathRoot, 620 exp.JSONPathSubscript, 621 exp.JSONPathWildcard, 622 } 623 624 TYPE_MAPPING = { 625 **generator.Generator.TYPE_MAPPING, 626 exp.DataType.Type.BINARY: "BLOB", 627 exp.DataType.Type.BPCHAR: "TEXT", 628 exp.DataType.Type.CHAR: "TEXT", 629 exp.DataType.Type.FLOAT: "REAL", 630 exp.DataType.Type.NCHAR: "TEXT", 631 exp.DataType.Type.NVARCHAR: "TEXT", 632 exp.DataType.Type.UINT: "UINTEGER", 633 exp.DataType.Type.VARBINARY: "BLOB", 634 exp.DataType.Type.ROWVERSION: "BLOB", 635 exp.DataType.Type.VARCHAR: "TEXT", 636 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 637 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 638 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 639 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 640 } 641 642 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 643 RESERVED_KEYWORDS = { 644 "array", 645 "analyse", 646 "union", 647 "all", 648 "when", 649 "in_p", 650 "default", 651 "create_p", 652 "window", 653 "asymmetric", 654 "to", 655 "else", 656 "localtime", 657 "from", 658 "end_p", 659 "select", 660 "current_date", 661 "foreign", 662 "with", 663 "grant", 664 "session_user", 665 "or", 666 "except", 667 "references", 668 "fetch", 669 "limit", 670 "group_p", 671 "leading", 672 "into", 673 "collate", 674 "offset", 675 "do", 676 "then", 677 "localtimestamp", 678 "check_p", 679 "lateral_p", 680 "current_role", 681 "where", 682 "asc_p", 683 "placing", 684 "desc_p", 685 "user", 686 "unique", 687 "initially", 688 "column", 689 "both", 690 "some", 691 "as", 692 "any", 693 "only", 694 "deferrable", 695 "null_p", 696 "current_time", 697 "true_p", 698 "table", 699 "case", 700 "trailing", 701 "variadic", 702 "for", 703 "on", 704 "distinct", 705 "false_p", 706 "not", 707 "constraint", 708 "current_timestamp", 709 "returning", 710 "primary", 711 "intersect", 712 "having", 713 "analyze", 714 "current_user", 715 "and", 716 "cast", 717 "symmetric", 718 "using", 719 "order", 720 "current_catalog", 721 } 722 723 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 724 725 # DuckDB doesn't generally support CREATE TABLE .. properties 726 # https://duckdb.org/docs/sql/statements/create_table.html 727 PROPERTIES_LOCATION = { 728 prop: exp.Properties.Location.UNSUPPORTED 729 for prop in generator.Generator.PROPERTIES_LOCATION 730 } 731 732 # There are a few exceptions (e.g. temporary tables) which are supported or 733 # can be transpiled to DuckDB, so we explicitly override them accordingly 734 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 735 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 736 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 737 738 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 739 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 740 741 def strtotime_sql(self, expression: exp.StrToTime) -> str: 742 if expression.args.get("safe"): 743 formatted_time = self.format_time(expression) 744 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 745 return str_to_time_sql(self, expression) 746 747 def strtodate_sql(self, expression: exp.StrToDate) -> str: 748 if expression.args.get("safe"): 749 formatted_time = self.format_time(expression) 750 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 751 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 752 753 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 754 arg = expression.this 755 if expression.args.get("safe"): 756 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 757 return self.func("JSON", arg) 758 759 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 760 nano = expression.args.get("nano") 761 if nano is not None: 762 expression.set( 763 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 764 ) 765 766 return rename_func("MAKE_TIME")(self, expression) 767 768 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 769 sec = expression.args["sec"] 770 771 milli = expression.args.get("milli") 772 if milli is not None: 773 sec += milli.pop() / exp.Literal.number(1000.0) 774 775 nano = expression.args.get("nano") 776 if nano is not None: 777 sec += nano.pop() / exp.Literal.number(1000000000.0) 778 779 if milli or nano: 780 expression.set("sec", sec) 781 782 return rename_func("MAKE_TIMESTAMP")(self, expression) 783 784 def tablesample_sql( 785 self, 786 expression: exp.TableSample, 787 tablesample_keyword: t.Optional[str] = None, 788 ) -> str: 789 if not isinstance(expression.parent, exp.Select): 790 # This sample clause only applies to a single source, not the entire resulting relation 791 tablesample_keyword = "TABLESAMPLE" 792 793 if expression.args.get("size"): 794 method = expression.args.get("method") 795 if method and method.name.upper() != "RESERVOIR": 796 self.unsupported( 797 f"Sampling method {method} is not supported with a discrete sample count, " 798 "defaulting to reservoir sampling" 799 ) 800 expression.set("method", exp.var("RESERVOIR")) 801 802 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 803 804 def interval_sql(self, expression: exp.Interval) -> str: 805 multiplier: t.Optional[int] = None 806 unit = expression.text("unit").lower() 807 808 if unit.startswith("week"): 809 multiplier = 7 810 if unit.startswith("quarter"): 811 multiplier = 90 812 813 if multiplier: 814 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 815 816 return super().interval_sql(expression) 817 818 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 819 if isinstance(expression.parent, exp.UserDefinedFunction): 820 return self.sql(expression, "this") 821 return super().columndef_sql(expression, sep) 822 823 def join_sql(self, expression: exp.Join) -> str: 824 if ( 825 expression.side == "LEFT" 826 and not expression.args.get("on") 827 and isinstance(expression.this, exp.Unnest) 828 ): 829 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 830 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 831 return super().join_sql(expression.on(exp.true())) 832 833 return super().join_sql(expression) 834 835 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 836 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 837 if expression.args.get("is_end_exclusive"): 838 return rename_func("RANGE")(self, expression) 839 840 return self.function_fallback_sql(expression) 841 842 def bracket_sql(self, expression: exp.Bracket) -> str: 843 this = expression.this 844 if isinstance(this, exp.Array): 845 this.replace(exp.paren(this)) 846 847 bracket = super().bracket_sql(expression) 848 849 if not expression.args.get("returns_list_for_maps"): 850 if not this.type: 851 from sqlglot.optimizer.annotate_types import annotate_types 852 853 this = annotate_types(this) 854 855 if this.is_type(exp.DataType.Type.MAP): 856 bracket = f"({bracket})[1]" 857 858 return bracket 859 860 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 861 expression_sql = self.sql(expression, "expression") 862 863 func = expression.this 864 if isinstance(func, exp.PERCENTILES): 865 # Make the order key the first arg and slide the fraction to the right 866 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 867 order_col = expression.find(exp.Ordered) 868 if order_col: 869 func.set("expression", func.this) 870 func.set("this", order_col.this) 871 872 this = self.sql(expression, "this").rstrip(")") 873 874 return f"{this}{expression_sql})" 875 876 def length_sql(self, expression: exp.Length) -> str: 877 arg = expression.this 878 879 # Dialects like BQ and Snowflake also accept binary values as args, so 880 # DDB will attempt to infer the type or resort to case/when resolution 881 if not expression.args.get("binary") or arg.is_string: 882 return self.func("LENGTH", arg) 883 884 if not arg.type: 885 from sqlglot.optimizer.annotate_types import annotate_types 886 887 arg = annotate_types(arg) 888 889 if arg.is_type(*exp.DataType.TEXT_TYPES): 890 return self.func("LENGTH", arg) 891 892 # We need these casts to make duckdb's static type checker happy 893 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 894 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 895 896 case = ( 897 exp.case(self.func("TYPEOF", arg)) 898 .when( 899 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 900 ) # anonymous to break length_sql recursion 901 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 902 ) 903 904 return self.sql(case) 905 906 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 907 this = expression.this 908 key = expression.args.get("key") 909 key_sql = key.name if isinstance(key, exp.Expression) else "" 910 value_sql = self.sql(expression, "value") 911 912 kv_sql = f"{key_sql} := {value_sql}" 913 914 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 915 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 916 if isinstance(this, exp.Struct) and not this.expressions: 917 return self.func("STRUCT_PACK", kv_sql) 918 919 return self.func("STRUCT_INSERT", this, kv_sql) 920 921 def unnest_sql(self, expression: exp.Unnest) -> str: 922 explode_array = expression.args.get("explode_array") 923 if explode_array: 924 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 925 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 926 expression.expressions.append( 927 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 928 ) 929 930 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 931 alias = expression.args.get("alias") 932 if alias: 933 expression.set("alias", None) 934 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 935 936 unnest_sql = super().unnest_sql(expression) 937 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 938 return self.sql(select) 939 940 return super().unnest_sql(expression) 941 942 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 943 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 944 # DuckDB should render IGNORE NULLS only for the general-purpose 945 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 946 return super().ignorenulls_sql(expression) 947 948 return self.sql(expression, "this") 949 950 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 951 this = self.sql(expression, "this") 952 null_text = self.sql(expression, "null") 953 954 if null_text: 955 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 956 957 return self.func("ARRAY_TO_STRING", this, expression.expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
273 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 274 if isinstance(path, exp.Literal): 275 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 276 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 277 # This check ensures we'll avoid trying to parse these as JSON paths, which can 278 # either result in a noisy warning or in an invalid representation of the path. 279 path_text = path.name 280 if path_text.startswith("/") or "[#" in path_text: 281 return path 282 283 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
285 class Tokenizer(tokens.Tokenizer): 286 HEREDOC_STRINGS = ["$"] 287 288 HEREDOC_TAG_IS_IDENTIFIER = True 289 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "//": TokenType.DIV, 294 "**": TokenType.DSTAR, 295 "^@": TokenType.CARET_AT, 296 "@>": TokenType.AT_GT, 297 "<@": TokenType.LT_AT, 298 "ATTACH": TokenType.COMMAND, 299 "BINARY": TokenType.VARBINARY, 300 "BITSTRING": TokenType.BIT, 301 "BPCHAR": TokenType.TEXT, 302 "CHAR": TokenType.TEXT, 303 "CHARACTER VARYING": TokenType.TEXT, 304 "EXCLUDE": TokenType.EXCEPT, 305 "LOGICAL": TokenType.BOOLEAN, 306 "ONLY": TokenType.ONLY, 307 "PIVOT_WIDER": TokenType.PIVOT, 308 "POSITIONAL": TokenType.POSITIONAL, 309 "SIGNED": TokenType.INT, 310 "STRING": TokenType.TEXT, 311 "SUMMARIZE": TokenType.SUMMARIZE, 312 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 313 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 314 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 315 "TIMESTAMP_US": TokenType.TIMESTAMP, 316 "UBIGINT": TokenType.UBIGINT, 317 "UINTEGER": TokenType.UINT, 318 "USMALLINT": TokenType.USMALLINT, 319 "UTINYINT": TokenType.UTINYINT, 320 "VARCHAR": TokenType.TEXT, 321 } 322 KEYWORDS.pop("/*+") 323 324 SINGLE_TOKENS = { 325 **tokens.Tokenizer.SINGLE_TOKENS, 326 "$": TokenType.PARAMETER, 327 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
329 class Parser(parser.Parser): 330 BITWISE = { 331 **parser.Parser.BITWISE, 332 TokenType.TILDA: exp.RegexpLike, 333 } 334 BITWISE.pop(TokenType.CARET) 335 336 RANGE_PARSERS = { 337 **parser.Parser.RANGE_PARSERS, 338 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 339 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 340 } 341 342 EXPONENT = { 343 **parser.Parser.EXPONENT, 344 TokenType.CARET: exp.Pow, 345 TokenType.DSTAR: exp.Pow, 346 } 347 348 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 353 "ARRAY_SORT": exp.SortArray.from_arg_list, 354 "DATEDIFF": _build_date_diff, 355 "DATE_DIFF": _build_date_diff, 356 "DATE_TRUNC": date_trunc_to_time, 357 "DATETRUNC": date_trunc_to_time, 358 "DECODE": lambda args: exp.Decode( 359 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 360 ), 361 "ENCODE": lambda args: exp.Encode( 362 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 363 ), 364 "EPOCH": exp.TimeToUnix.from_arg_list, 365 "EPOCH_MS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "JSON": exp.ParseJSON.from_arg_list, 369 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 370 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 371 "LIST_HAS": exp.ArrayContains.from_arg_list, 372 "LIST_REVERSE_SORT": _build_sort_array_desc, 373 "LIST_SORT": exp.SortArray.from_arg_list, 374 "LIST_VALUE": lambda args: exp.Array(expressions=args), 375 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 376 "MAKE_TIMESTAMP": _build_make_timestamp, 377 "MEDIAN": lambda args: exp.PercentileCont( 378 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 379 ), 380 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 381 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 382 "REGEXP_EXTRACT": build_regexp_extract, 383 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 384 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 385 this=seq_get(args, 0), 386 expression=seq_get(args, 1), 387 replacement=seq_get(args, 2), 388 modifiers=seq_get(args, 3), 389 ), 390 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 391 "STRING_SPLIT": exp.Split.from_arg_list, 392 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 393 "STRING_TO_ARRAY": exp.Split.from_arg_list, 394 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 395 "STRUCT_PACK": exp.Struct.from_arg_list, 396 "STR_SPLIT": exp.Split.from_arg_list, 397 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 398 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 399 "UNNEST": exp.Explode.from_arg_list, 400 "XOR": binary_from_function(exp.BitwiseXor), 401 "GENERATE_SERIES": _build_generate_series(), 402 "RANGE": _build_generate_series(end_exclusive=True), 403 } 404 405 FUNCTIONS.pop("DATE_SUB") 406 FUNCTIONS.pop("GLOB") 407 408 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 409 FUNCTION_PARSERS.pop("DECODE") 410 411 NO_PAREN_FUNCTION_PARSERS = { 412 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 413 "MAP": lambda self: self._parse_map(), 414 } 415 416 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 PLACEHOLDER_PARSERS = { 422 **parser.Parser.PLACEHOLDER_PARSERS, 423 TokenType.PARAMETER: lambda self: ( 424 self.expression(exp.Placeholder, this=self._prev.text) 425 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 426 else None 427 ), 428 } 429 430 TYPE_CONVERTERS = { 431 # https://duckdb.org/docs/sql/data_types/numeric 432 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 433 # https://duckdb.org/docs/sql/data_types/text 434 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 435 } 436 437 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 438 # https://duckdb.org/docs/sql/samples.html 439 sample = super()._parse_table_sample(as_modifier=as_modifier) 440 if sample and not sample.args.get("method"): 441 if sample.args.get("size"): 442 sample.set("method", exp.var("RESERVOIR")) 443 else: 444 sample.set("method", exp.var("SYSTEM")) 445 446 return sample 447 448 def _parse_bracket( 449 self, this: t.Optional[exp.Expression] = None 450 ) -> t.Optional[exp.Expression]: 451 bracket = super()._parse_bracket(this) 452 if isinstance(bracket, exp.Bracket): 453 bracket.set("returns_list_for_maps", True) 454 455 return bracket 456 457 def _parse_map(self) -> exp.ToMap | exp.Map: 458 if self._match(TokenType.L_BRACE, advance=False): 459 return self.expression(exp.ToMap, this=self._parse_bracket()) 460 461 args = self._parse_wrapped_csv(self._parse_assignment) 462 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 463 464 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 465 return self._parse_field_def() 466 467 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 468 if len(aggregations) == 1: 469 return super()._pivot_column_names(aggregations) 470 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
472 class Generator(generator.Generator): 473 PARAMETER_TOKEN = "$" 474 NAMED_PLACEHOLDER_TOKEN = "$" 475 JOIN_HINTS = False 476 TABLE_HINTS = False 477 QUERY_HINTS = False 478 LIMIT_FETCH = "LIMIT" 479 STRUCT_DELIMITER = ("(", ")") 480 RENAME_TABLE_WITH_DB = False 481 NVL2_SUPPORTED = False 482 SEMI_ANTI_JOIN_WITH_SIDE = False 483 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 484 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 485 LAST_DAY_SUPPORTS_DATE_PART = False 486 JSON_KEY_VALUE_PAIR_SEP = "," 487 IGNORE_NULLS_IN_FUNC = True 488 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 489 SUPPORTS_CREATE_TABLE_LIKE = False 490 MULTI_ARG_DISTINCT = False 491 CAN_IMPLEMENT_ARRAY_ANY = True 492 SUPPORTS_TO_NUMBER = False 493 COPY_HAS_INTO_KEYWORD = False 494 STAR_EXCEPT = "EXCLUDE" 495 PAD_FILL_PATTERN_IS_REQUIRED = True 496 ARRAY_CONCAT_IS_VAR_LEN = False 497 498 TRANSFORMS = { 499 **generator.Generator.TRANSFORMS, 500 exp.ApproxDistinct: approx_count_distinct_sql, 501 exp.Array: inline_array_unless_query, 502 exp.ArrayFilter: rename_func("LIST_FILTER"), 503 exp.ArraySize: rename_func("ARRAY_LENGTH"), 504 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 505 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 506 exp.ArraySort: _array_sort_sql, 507 exp.ArraySum: rename_func("LIST_SUM"), 508 exp.BitwiseXor: rename_func("XOR"), 509 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 510 exp.CurrentDate: lambda *_: "CURRENT_DATE", 511 exp.CurrentTime: lambda *_: "CURRENT_TIME", 512 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 513 exp.DayOfMonth: rename_func("DAYOFMONTH"), 514 exp.DayOfWeek: rename_func("DAYOFWEEK"), 515 exp.DayOfWeekIso: rename_func("ISODOW"), 516 exp.DayOfYear: rename_func("DAYOFYEAR"), 517 exp.DataType: _datatype_sql, 518 exp.Date: _date_sql, 519 exp.DateAdd: _date_delta_sql, 520 exp.DateFromParts: rename_func("MAKE_DATE"), 521 exp.DateSub: _date_delta_sql, 522 exp.DateDiff: _date_diff_sql, 523 exp.DateStrToDate: datestrtodate_sql, 524 exp.Datetime: no_datetime_sql, 525 exp.DatetimeSub: _date_delta_sql, 526 exp.DatetimeAdd: _date_delta_sql, 527 exp.DateToDi: lambda self, 528 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 529 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 530 exp.DiToDate: lambda self, 531 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 532 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 533 exp.GenerateDateArray: _generate_datetime_array_sql, 534 exp.GenerateTimestampArray: _generate_datetime_array_sql, 535 exp.Explode: rename_func("UNNEST"), 536 exp.IntDiv: lambda self, e: self.binary(e, "//"), 537 exp.IsInf: rename_func("ISINF"), 538 exp.IsNan: rename_func("ISNAN"), 539 exp.JSONExtract: _arrow_json_extract_sql, 540 exp.JSONExtractScalar: _arrow_json_extract_sql, 541 exp.JSONFormat: _json_format_sql, 542 exp.LogicalOr: rename_func("BOOL_OR"), 543 exp.LogicalAnd: rename_func("BOOL_AND"), 544 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 545 exp.MonthsBetween: lambda self, e: self.func( 546 "DATEDIFF", 547 "'month'", 548 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 549 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 550 ), 551 exp.PercentileCont: rename_func("QUANTILE_CONT"), 552 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 553 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 554 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 555 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 556 exp.RegexpExtract: regexp_extract_sql, 557 exp.RegexpReplace: lambda self, e: self.func( 558 "REGEXP_REPLACE", 559 e.this, 560 e.expression, 561 e.args.get("replacement"), 562 e.args.get("modifiers"), 563 ), 564 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 565 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 566 exp.Return: lambda self, e: self.sql(e, "this"), 567 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 568 exp.Rand: rename_func("RANDOM"), 569 exp.SafeDivide: no_safe_divide_sql, 570 exp.SHA: rename_func("SHA1"), 571 exp.SHA2: sha256_sql, 572 exp.Split: rename_func("STR_SPLIT"), 573 exp.SortArray: _sort_array_sql, 574 exp.StrPosition: str_position_sql, 575 exp.StrToUnix: lambda self, e: self.func( 576 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 577 ), 578 exp.Struct: _struct_sql, 579 exp.Transform: rename_func("LIST_TRANSFORM"), 580 exp.TimeAdd: _date_delta_sql, 581 exp.Time: no_time_sql, 582 exp.TimeDiff: _timediff_sql, 583 exp.Timestamp: no_timestamp_sql, 584 exp.TimestampDiff: lambda self, e: self.func( 585 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 586 ), 587 exp.TimestampTrunc: timestamptrunc_sql(), 588 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 589 exp.TimeStrToTime: timestrtotime_sql, 590 exp.TimeStrToUnix: lambda self, e: self.func( 591 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 592 ), 593 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 594 exp.TimeToUnix: rename_func("EPOCH"), 595 exp.TsOrDiToDi: lambda self, 596 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 597 exp.TsOrDsAdd: _date_delta_sql, 598 exp.TsOrDsDiff: lambda self, e: self.func( 599 "DATE_DIFF", 600 f"'{e.args.get('unit') or 'DAY'}'", 601 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 602 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 603 ), 604 exp.UnixToStr: lambda self, e: self.func( 605 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 606 ), 607 exp.DatetimeTrunc: lambda self, e: self.func( 608 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 609 ), 610 exp.UnixToTime: _unix_to_time_sql, 611 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 612 exp.VariancePop: rename_func("VAR_POP"), 613 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 614 exp.Xor: bool_xor_sql, 615 } 616 617 SUPPORTED_JSON_PATH_PARTS = { 618 exp.JSONPathKey, 619 exp.JSONPathRoot, 620 exp.JSONPathSubscript, 621 exp.JSONPathWildcard, 622 } 623 624 TYPE_MAPPING = { 625 **generator.Generator.TYPE_MAPPING, 626 exp.DataType.Type.BINARY: "BLOB", 627 exp.DataType.Type.BPCHAR: "TEXT", 628 exp.DataType.Type.CHAR: "TEXT", 629 exp.DataType.Type.FLOAT: "REAL", 630 exp.DataType.Type.NCHAR: "TEXT", 631 exp.DataType.Type.NVARCHAR: "TEXT", 632 exp.DataType.Type.UINT: "UINTEGER", 633 exp.DataType.Type.VARBINARY: "BLOB", 634 exp.DataType.Type.ROWVERSION: "BLOB", 635 exp.DataType.Type.VARCHAR: "TEXT", 636 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 637 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 638 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 639 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 640 } 641 642 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 643 RESERVED_KEYWORDS = { 644 "array", 645 "analyse", 646 "union", 647 "all", 648 "when", 649 "in_p", 650 "default", 651 "create_p", 652 "window", 653 "asymmetric", 654 "to", 655 "else", 656 "localtime", 657 "from", 658 "end_p", 659 "select", 660 "current_date", 661 "foreign", 662 "with", 663 "grant", 664 "session_user", 665 "or", 666 "except", 667 "references", 668 "fetch", 669 "limit", 670 "group_p", 671 "leading", 672 "into", 673 "collate", 674 "offset", 675 "do", 676 "then", 677 "localtimestamp", 678 "check_p", 679 "lateral_p", 680 "current_role", 681 "where", 682 "asc_p", 683 "placing", 684 "desc_p", 685 "user", 686 "unique", 687 "initially", 688 "column", 689 "both", 690 "some", 691 "as", 692 "any", 693 "only", 694 "deferrable", 695 "null_p", 696 "current_time", 697 "true_p", 698 "table", 699 "case", 700 "trailing", 701 "variadic", 702 "for", 703 "on", 704 "distinct", 705 "false_p", 706 "not", 707 "constraint", 708 "current_timestamp", 709 "returning", 710 "primary", 711 "intersect", 712 "having", 713 "analyze", 714 "current_user", 715 "and", 716 "cast", 717 "symmetric", 718 "using", 719 "order", 720 "current_catalog", 721 } 722 723 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 724 725 # DuckDB doesn't generally support CREATE TABLE .. properties 726 # https://duckdb.org/docs/sql/statements/create_table.html 727 PROPERTIES_LOCATION = { 728 prop: exp.Properties.Location.UNSUPPORTED 729 for prop in generator.Generator.PROPERTIES_LOCATION 730 } 731 732 # There are a few exceptions (e.g. temporary tables) which are supported or 733 # can be transpiled to DuckDB, so we explicitly override them accordingly 734 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 735 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 736 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 737 738 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 739 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 740 741 def strtotime_sql(self, expression: exp.StrToTime) -> str: 742 if expression.args.get("safe"): 743 formatted_time = self.format_time(expression) 744 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 745 return str_to_time_sql(self, expression) 746 747 def strtodate_sql(self, expression: exp.StrToDate) -> str: 748 if expression.args.get("safe"): 749 formatted_time = self.format_time(expression) 750 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 751 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 752 753 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 754 arg = expression.this 755 if expression.args.get("safe"): 756 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 757 return self.func("JSON", arg) 758 759 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 760 nano = expression.args.get("nano") 761 if nano is not None: 762 expression.set( 763 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 764 ) 765 766 return rename_func("MAKE_TIME")(self, expression) 767 768 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 769 sec = expression.args["sec"] 770 771 milli = expression.args.get("milli") 772 if milli is not None: 773 sec += milli.pop() / exp.Literal.number(1000.0) 774 775 nano = expression.args.get("nano") 776 if nano is not None: 777 sec += nano.pop() / exp.Literal.number(1000000000.0) 778 779 if milli or nano: 780 expression.set("sec", sec) 781 782 return rename_func("MAKE_TIMESTAMP")(self, expression) 783 784 def tablesample_sql( 785 self, 786 expression: exp.TableSample, 787 tablesample_keyword: t.Optional[str] = None, 788 ) -> str: 789 if not isinstance(expression.parent, exp.Select): 790 # This sample clause only applies to a single source, not the entire resulting relation 791 tablesample_keyword = "TABLESAMPLE" 792 793 if expression.args.get("size"): 794 method = expression.args.get("method") 795 if method and method.name.upper() != "RESERVOIR": 796 self.unsupported( 797 f"Sampling method {method} is not supported with a discrete sample count, " 798 "defaulting to reservoir sampling" 799 ) 800 expression.set("method", exp.var("RESERVOIR")) 801 802 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 803 804 def interval_sql(self, expression: exp.Interval) -> str: 805 multiplier: t.Optional[int] = None 806 unit = expression.text("unit").lower() 807 808 if unit.startswith("week"): 809 multiplier = 7 810 if unit.startswith("quarter"): 811 multiplier = 90 812 813 if multiplier: 814 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 815 816 return super().interval_sql(expression) 817 818 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 819 if isinstance(expression.parent, exp.UserDefinedFunction): 820 return self.sql(expression, "this") 821 return super().columndef_sql(expression, sep) 822 823 def join_sql(self, expression: exp.Join) -> str: 824 if ( 825 expression.side == "LEFT" 826 and not expression.args.get("on") 827 and isinstance(expression.this, exp.Unnest) 828 ): 829 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 830 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 831 return super().join_sql(expression.on(exp.true())) 832 833 return super().join_sql(expression) 834 835 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 836 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 837 if expression.args.get("is_end_exclusive"): 838 return rename_func("RANGE")(self, expression) 839 840 return self.function_fallback_sql(expression) 841 842 def bracket_sql(self, expression: exp.Bracket) -> str: 843 this = expression.this 844 if isinstance(this, exp.Array): 845 this.replace(exp.paren(this)) 846 847 bracket = super().bracket_sql(expression) 848 849 if not expression.args.get("returns_list_for_maps"): 850 if not this.type: 851 from sqlglot.optimizer.annotate_types import annotate_types 852 853 this = annotate_types(this) 854 855 if this.is_type(exp.DataType.Type.MAP): 856 bracket = f"({bracket})[1]" 857 858 return bracket 859 860 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 861 expression_sql = self.sql(expression, "expression") 862 863 func = expression.this 864 if isinstance(func, exp.PERCENTILES): 865 # Make the order key the first arg and slide the fraction to the right 866 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 867 order_col = expression.find(exp.Ordered) 868 if order_col: 869 func.set("expression", func.this) 870 func.set("this", order_col.this) 871 872 this = self.sql(expression, "this").rstrip(")") 873 874 return f"{this}{expression_sql})" 875 876 def length_sql(self, expression: exp.Length) -> str: 877 arg = expression.this 878 879 # Dialects like BQ and Snowflake also accept binary values as args, so 880 # DDB will attempt to infer the type or resort to case/when resolution 881 if not expression.args.get("binary") or arg.is_string: 882 return self.func("LENGTH", arg) 883 884 if not arg.type: 885 from sqlglot.optimizer.annotate_types import annotate_types 886 887 arg = annotate_types(arg) 888 889 if arg.is_type(*exp.DataType.TEXT_TYPES): 890 return self.func("LENGTH", arg) 891 892 # We need these casts to make duckdb's static type checker happy 893 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 894 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 895 896 case = ( 897 exp.case(self.func("TYPEOF", arg)) 898 .when( 899 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 900 ) # anonymous to break length_sql recursion 901 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 902 ) 903 904 return self.sql(case) 905 906 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 907 this = expression.this 908 key = expression.args.get("key") 909 key_sql = key.name if isinstance(key, exp.Expression) else "" 910 value_sql = self.sql(expression, "value") 911 912 kv_sql = f"{key_sql} := {value_sql}" 913 914 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 915 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 916 if isinstance(this, exp.Struct) and not this.expressions: 917 return self.func("STRUCT_PACK", kv_sql) 918 919 return self.func("STRUCT_INSERT", this, kv_sql) 920 921 def unnest_sql(self, expression: exp.Unnest) -> str: 922 explode_array = expression.args.get("explode_array") 923 if explode_array: 924 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 925 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 926 expression.expressions.append( 927 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 928 ) 929 930 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 931 alias = expression.args.get("alias") 932 if alias: 933 expression.set("alias", None) 934 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 935 936 unnest_sql = super().unnest_sql(expression) 937 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 938 return self.sql(select) 939 940 return super().unnest_sql(expression) 941 942 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 943 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 944 # DuckDB should render IGNORE NULLS only for the general-purpose 945 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 946 return super().ignorenulls_sql(expression) 947 948 return self.sql(expression, "this") 949 950 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 951 this = self.sql(expression, "this") 952 null_text = self.sql(expression, "null") 953 954 if null_text: 955 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 956 957 return self.func("ARRAY_TO_STRING", this, expression.expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
747 def strtodate_sql(self, expression: exp.StrToDate) -> str: 748 if expression.args.get("safe"): 749 formatted_time = self.format_time(expression) 750 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 751 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
759 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 760 nano = expression.args.get("nano") 761 if nano is not None: 762 expression.set( 763 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 764 ) 765 766 return rename_func("MAKE_TIME")(self, expression)
768 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 769 sec = expression.args["sec"] 770 771 milli = expression.args.get("milli") 772 if milli is not None: 773 sec += milli.pop() / exp.Literal.number(1000.0) 774 775 nano = expression.args.get("nano") 776 if nano is not None: 777 sec += nano.pop() / exp.Literal.number(1000000000.0) 778 779 if milli or nano: 780 expression.set("sec", sec) 781 782 return rename_func("MAKE_TIMESTAMP")(self, expression)
784 def tablesample_sql( 785 self, 786 expression: exp.TableSample, 787 tablesample_keyword: t.Optional[str] = None, 788 ) -> str: 789 if not isinstance(expression.parent, exp.Select): 790 # This sample clause only applies to a single source, not the entire resulting relation 791 tablesample_keyword = "TABLESAMPLE" 792 793 if expression.args.get("size"): 794 method = expression.args.get("method") 795 if method and method.name.upper() != "RESERVOIR": 796 self.unsupported( 797 f"Sampling method {method} is not supported with a discrete sample count, " 798 "defaulting to reservoir sampling" 799 ) 800 expression.set("method", exp.var("RESERVOIR")) 801 802 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
804 def interval_sql(self, expression: exp.Interval) -> str: 805 multiplier: t.Optional[int] = None 806 unit = expression.text("unit").lower() 807 808 if unit.startswith("week"): 809 multiplier = 7 810 if unit.startswith("quarter"): 811 multiplier = 90 812 813 if multiplier: 814 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 815 816 return super().interval_sql(expression)
823 def join_sql(self, expression: exp.Join) -> str: 824 if ( 825 expression.side == "LEFT" 826 and not expression.args.get("on") 827 and isinstance(expression.this, exp.Unnest) 828 ): 829 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 830 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 831 return super().join_sql(expression.on(exp.true())) 832 833 return super().join_sql(expression)
842 def bracket_sql(self, expression: exp.Bracket) -> str: 843 this = expression.this 844 if isinstance(this, exp.Array): 845 this.replace(exp.paren(this)) 846 847 bracket = super().bracket_sql(expression) 848 849 if not expression.args.get("returns_list_for_maps"): 850 if not this.type: 851 from sqlglot.optimizer.annotate_types import annotate_types 852 853 this = annotate_types(this) 854 855 if this.is_type(exp.DataType.Type.MAP): 856 bracket = f"({bracket})[1]" 857 858 return bracket
860 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 861 expression_sql = self.sql(expression, "expression") 862 863 func = expression.this 864 if isinstance(func, exp.PERCENTILES): 865 # Make the order key the first arg and slide the fraction to the right 866 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 867 order_col = expression.find(exp.Ordered) 868 if order_col: 869 func.set("expression", func.this) 870 func.set("this", order_col.this) 871 872 this = self.sql(expression, "this").rstrip(")") 873 874 return f"{this}{expression_sql})"
876 def length_sql(self, expression: exp.Length) -> str: 877 arg = expression.this 878 879 # Dialects like BQ and Snowflake also accept binary values as args, so 880 # DDB will attempt to infer the type or resort to case/when resolution 881 if not expression.args.get("binary") or arg.is_string: 882 return self.func("LENGTH", arg) 883 884 if not arg.type: 885 from sqlglot.optimizer.annotate_types import annotate_types 886 887 arg = annotate_types(arg) 888 889 if arg.is_type(*exp.DataType.TEXT_TYPES): 890 return self.func("LENGTH", arg) 891 892 # We need these casts to make duckdb's static type checker happy 893 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 894 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 895 896 case = ( 897 exp.case(self.func("TYPEOF", arg)) 898 .when( 899 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 900 ) # anonymous to break length_sql recursion 901 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 902 ) 903 904 return self.sql(case)
906 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 907 this = expression.this 908 key = expression.args.get("key") 909 key_sql = key.name if isinstance(key, exp.Expression) else "" 910 value_sql = self.sql(expression, "value") 911 912 kv_sql = f"{key_sql} := {value_sql}" 913 914 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 915 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 916 if isinstance(this, exp.Struct) and not this.expressions: 917 return self.func("STRUCT_PACK", kv_sql) 918 919 return self.func("STRUCT_INSERT", this, kv_sql)
921 def unnest_sql(self, expression: exp.Unnest) -> str: 922 explode_array = expression.args.get("explode_array") 923 if explode_array: 924 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 925 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 926 expression.expressions.append( 927 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 928 ) 929 930 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 931 alias = expression.args.get("alias") 932 if alias: 933 expression.set("alias", None) 934 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 935 936 unnest_sql = super().unnest_sql(expression) 937 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 938 return self.sql(select) 939 940 return super().unnest_sql(expression)
942 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 943 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 944 # DuckDB should render IGNORE NULLS only for the general-purpose 945 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 946 return super().ignorenulls_sql(expression) 947 948 return self.sql(expression, "this")
950 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 951 this = self.sql(expression, "this") 952 null_text = self.sql(expression, "null") 953 954 if null_text: 955 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 956 957 return self.func("ARRAY_TO_STRING", this, expression.expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql