sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import build_scope, find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 257 unnest_parent = unnest.parent 258 if isinstance(unnest_parent, exp.Join): 259 select = unnest_parent.parent 260 if isinstance(select, exp.Select): 261 replace_column_name = ( 262 sequence_value_name 263 if isinstance(sequence_value_name, str) 264 else sequence_value_name.name 265 ) 266 267 scope = build_scope(select) 268 if scope: 269 for column in scope.columns: 270 if column.name.lower() == replace_column_name.lower(): 271 column.replace( 272 date_add.as_(replace_column_name) 273 if isinstance(column.parent, exp.Select) 274 else date_add 275 ) 276 277 lateral = exp.Lateral(this=unnest_parent.this.pop()) 278 unnest_parent.replace(exp.Join(this=lateral)) 279 else: 280 unnest.replace( 281 exp.select(date_add.as_(sequence_value_name)) 282 .from_(unnest.copy()) 283 .subquery(unnest_alias) 284 ) 285 286 287def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 288 if isinstance(expression, exp.Select): 289 for generate_date_array in expression.find_all(exp.GenerateDateArray): 290 parent = generate_date_array.parent 291 292 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 293 # query is the following (it'll be unnested properly on the next iteration due to copy): 294 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 295 if not isinstance(parent, exp.Unnest): 296 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 297 generate_date_array.replace( 298 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 299 ) 300 301 if ( 302 isinstance(parent, exp.Unnest) 303 and isinstance(parent.parent, (exp.From, exp.Join)) 304 and len(parent.expressions) == 1 305 ): 306 _unnest_generate_date_array(parent) 307 308 return expression 309 310 311def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 312 def _builder(args: t.List) -> E: 313 return expr_type( 314 this=seq_get(args, 0), 315 expression=seq_get(args, 1), 316 position=seq_get(args, 2), 317 occurrence=seq_get(args, 3), 318 parameters=seq_get(args, 4), 319 group=seq_get(args, 5) or exp.Literal.number(0), 320 ) 321 322 return _builder 323 324 325def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 326 # Other dialects don't support all of the following parameters, so we need to 327 # generate default values as necessary to ensure the transpilation is correct 328 group = expression.args.get("group") 329 330 # To avoid generating all these default values, we set group to None if 331 # it's 0 (also default value) which doesn't trigger the following chain 332 if group and group.name == "0": 333 group = None 334 335 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 336 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 337 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 338 339 return self.func( 340 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 341 expression.this, 342 expression.expression, 343 position, 344 occurrence, 345 parameters, 346 group, 347 ) 348 349 350def _json_extract_value_array_sql( 351 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 352) -> str: 353 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 354 ident = exp.to_identifier("x") 355 356 if isinstance(expression, exp.JSONValueArray): 357 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 358 else: 359 this = exp.ParseJSON(this=f"TO_JSON({ident})") 360 361 transform_lambda = exp.Lambda(expressions=[ident], this=this) 362 363 return self.func("TRANSFORM", json_extract, transform_lambda) 364 365 366def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 367 if isinstance(expression, exp.Select): 368 scope = build_scope(expression) 369 if not scope: 370 return expression 371 372 unnests = list(scope.find_all(exp.Unnest)) 373 374 if not unnests: 375 return expression 376 377 taken_source_names = set(scope.sources) 378 column_source: t.Dict[str, exp.Identifier] = {} 379 380 unnest_identifier: t.Optional[exp.Identifier] = None 381 orig_expression = expression.copy() 382 383 for unnest in unnests: 384 if not isinstance(unnest.parent, (exp.From, exp.Join)): 385 continue 386 387 # Try to infer column names produced by an unnest operator. This is only possible 388 # when we can peek into the (statically known) contents of the unnested value. 389 unnest_columns: t.Set[str] = set() 390 for unnest_expr in unnest.expressions: 391 if not isinstance(unnest_expr, exp.Array): 392 continue 393 394 for array_expr in unnest_expr.expressions: 395 if not ( 396 isinstance(array_expr, exp.Struct) 397 and array_expr.expressions 398 and all( 399 isinstance(struct_expr, exp.PropertyEQ) 400 for struct_expr in array_expr.expressions 401 ) 402 ): 403 continue 404 405 unnest_columns.update( 406 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 407 ) 408 break 409 410 if unnest_columns: 411 break 412 413 unnest_alias = unnest.args.get("alias") 414 if not unnest_alias: 415 alias_name = find_new_name(taken_source_names, "value") 416 taken_source_names.add(alias_name) 417 418 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 419 # will be corrected later, when we generate SQL for the `Unnest` AST node. 420 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 421 scope.replace(unnest, aliased_unnest) 422 423 unnest_identifier = aliased_unnest.args["alias"].columns[0] 424 else: 425 alias_columns = getattr(unnest_alias, "columns", []) 426 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 427 428 if not isinstance(unnest_identifier, exp.Identifier): 429 return orig_expression 430 431 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 432 433 for column in scope.columns: 434 if column.table: 435 continue 436 437 table = column_source.get(column.name.lower()) 438 if ( 439 unnest_identifier 440 and not table 441 and len(scope.sources) == 1 442 and column.name.lower() != unnest_identifier.name.lower() 443 ): 444 table = unnest_identifier 445 446 column.set("table", table and table.copy()) 447 448 return expression 449 450 451def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 452 if isinstance(expression, exp.Select): 453 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 454 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 455 # by Snowflake's parser. 456 # 457 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 458 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 459 unnest_aliases = set() 460 for unnest in find_all_in_scope(expression, exp.Unnest): 461 unnest_alias = unnest.args.get("alias") 462 if ( 463 isinstance(unnest_alias, exp.TableAlias) 464 and not unnest_alias.this 465 and len(unnest_alias.columns) == 1 466 ): 467 unnest_aliases.add(unnest_alias.columns[0].name) 468 469 if unnest_aliases: 470 for c in find_all_in_scope(expression, exp.Column): 471 if c.table in unnest_aliases: 472 bracket_lhs = c.args["table"] 473 bracket_rhs = exp.Literal.string(c.name) 474 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 475 476 if c.parent is expression: 477 # Retain column projection names by using aliases 478 c.replace(exp.alias_(bracket, c.this.copy())) 479 else: 480 c.replace(bracket) 481 482 return expression 483 484 485class Snowflake(Dialect): 486 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 487 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 488 NULL_ORDERING = "nulls_are_large" 489 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 490 SUPPORTS_USER_DEFINED_TYPES = False 491 SUPPORTS_SEMI_ANTI_JOIN = False 492 PREFER_CTE_ALIAS_COLUMN = True 493 TABLESAMPLE_SIZE_IS_PERCENT = True 494 COPY_PARAMS_ARE_CSV = False 495 ARRAY_AGG_INCLUDES_NULLS = None 496 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 497 TRY_CAST_REQUIRES_STRING = True 498 499 TIME_MAPPING = { 500 "YYYY": "%Y", 501 "yyyy": "%Y", 502 "YY": "%y", 503 "yy": "%y", 504 "MMMM": "%B", 505 "mmmm": "%B", 506 "MON": "%b", 507 "mon": "%b", 508 "MM": "%m", 509 "mm": "%m", 510 "DD": "%d", 511 "dd": "%-d", 512 "DY": "%a", 513 "dy": "%w", 514 "HH24": "%H", 515 "hh24": "%H", 516 "HH12": "%I", 517 "hh12": "%I", 518 "MI": "%M", 519 "mi": "%M", 520 "SS": "%S", 521 "ss": "%S", 522 "FF6": "%f", 523 "ff6": "%f", 524 } 525 526 DATE_PART_MAPPING = { 527 **Dialect.DATE_PART_MAPPING, 528 "ISOWEEK": "WEEKISO", 529 } 530 531 def quote_identifier(self, expression: E, identify: bool = True) -> E: 532 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 533 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 534 if ( 535 isinstance(expression, exp.Identifier) 536 and isinstance(expression.parent, exp.Table) 537 and expression.name.lower() == "dual" 538 ): 539 return expression # type: ignore 540 541 return super().quote_identifier(expression, identify=identify) 542 543 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 544 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 545 SINGLE_TOKENS.pop("$") 546 547 class Parser(parser.Parser): 548 IDENTIFY_PIVOT_STRINGS = True 549 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 550 COLON_IS_VARIANT_EXTRACT = True 551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 552 553 ID_VAR_TOKENS = { 554 *parser.Parser.ID_VAR_TOKENS, 555 TokenType.EXCEPT, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET": exp.GetExtract.from_arg_list, 596 "GET_PATH": lambda args, dialect: exp.JSONExtract( 597 this=seq_get(args, 0), 598 expression=dialect.to_json_path(seq_get(args, 1)), 599 requires_json=True, 600 ), 601 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 602 "IFF": exp.If.from_arg_list, 603 "LAST_DAY": lambda args: exp.LastDay( 604 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 605 ), 606 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 608 "NULLIFZERO": _build_if_from_nullifzero, 609 "OBJECT_CONSTRUCT": _build_object_construct, 610 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 611 "REGEXP_REPLACE": _build_regexp_replace, 612 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 613 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 614 "REPLACE": build_replace_with_optional_replacement, 615 "RLIKE": exp.RegexpLike.from_arg_list, 616 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 617 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 618 "TIMEADD": _build_date_time_add(exp.TimeAdd), 619 "TIMEDIFF": _build_datediff, 620 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 621 "TIMESTAMPDIFF": _build_datediff, 622 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 623 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 624 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 625 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 626 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 627 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 628 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 629 "TRY_TO_TIMESTAMP": _build_datetime( 630 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 631 ), 632 "TO_CHAR": build_timetostr_or_tochar, 633 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 634 "TO_NUMBER": lambda args: exp.ToNumber( 635 this=seq_get(args, 0), 636 format=seq_get(args, 1), 637 precision=seq_get(args, 2), 638 scale=seq_get(args, 3), 639 ), 640 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 641 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 642 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 643 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 644 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 645 "TO_VARCHAR": exp.ToChar.from_arg_list, 646 "ZEROIFNULL": _build_if_from_zeroifnull, 647 } 648 649 FUNCTION_PARSERS = { 650 **parser.Parser.FUNCTION_PARSERS, 651 "DATE_PART": lambda self: self._parse_date_part(), 652 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 653 "LISTAGG": lambda self: self._parse_string_agg(), 654 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 655 } 656 FUNCTION_PARSERS.pop("TRIM") 657 658 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 659 660 ALTER_PARSERS = { 661 **parser.Parser.ALTER_PARSERS, 662 "UNSET": lambda self: self.expression( 663 exp.Set, 664 tag=self._match_text_seq("TAG"), 665 expressions=self._parse_csv(self._parse_id_var), 666 unset=True, 667 ), 668 } 669 670 STATEMENT_PARSERS = { 671 **parser.Parser.STATEMENT_PARSERS, 672 TokenType.GET: lambda self: self._parse_get(), 673 TokenType.PUT: lambda self: self._parse_put(), 674 TokenType.SHOW: lambda self: self._parse_show(), 675 } 676 677 PROPERTY_PARSERS = { 678 **parser.Parser.PROPERTY_PARSERS, 679 "CREDENTIALS": lambda self: self._parse_credentials_property(), 680 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 681 "LOCATION": lambda self: self._parse_location_property(), 682 "TAG": lambda self: self._parse_tag(), 683 "USING": lambda self: self._match_text_seq("TEMPLATE") 684 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 685 } 686 687 TYPE_CONVERTERS = { 688 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 689 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 690 } 691 692 SHOW_PARSERS = { 693 "DATABASES": _show_parser("DATABASES"), 694 "TERSE DATABASES": _show_parser("DATABASES"), 695 "SCHEMAS": _show_parser("SCHEMAS"), 696 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 697 "OBJECTS": _show_parser("OBJECTS"), 698 "TERSE OBJECTS": _show_parser("OBJECTS"), 699 "TABLES": _show_parser("TABLES"), 700 "TERSE TABLES": _show_parser("TABLES"), 701 "VIEWS": _show_parser("VIEWS"), 702 "TERSE VIEWS": _show_parser("VIEWS"), 703 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "SEQUENCES": _show_parser("SEQUENCES"), 710 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 711 "STAGES": _show_parser("STAGES"), 712 "COLUMNS": _show_parser("COLUMNS"), 713 "USERS": _show_parser("USERS"), 714 "TERSE USERS": _show_parser("USERS"), 715 "FILE FORMATS": _show_parser("FILE FORMATS"), 716 "FUNCTIONS": _show_parser("FUNCTIONS"), 717 "PROCEDURES": _show_parser("PROCEDURES"), 718 "WAREHOUSES": _show_parser("WAREHOUSES"), 719 } 720 721 CONSTRAINT_PARSERS = { 722 **parser.Parser.CONSTRAINT_PARSERS, 723 "WITH": lambda self: self._parse_with_constraint(), 724 "MASKING": lambda self: self._parse_with_constraint(), 725 "PROJECTION": lambda self: self._parse_with_constraint(), 726 "TAG": lambda self: self._parse_with_constraint(), 727 } 728 729 STAGED_FILE_SINGLE_TOKENS = { 730 TokenType.DOT, 731 TokenType.MOD, 732 TokenType.SLASH, 733 } 734 735 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 736 737 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 738 739 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 740 741 LAMBDAS = { 742 **parser.Parser.LAMBDAS, 743 TokenType.ARROW: lambda self, expressions: self.expression( 744 exp.Lambda, 745 this=self._replace_lambda( 746 self._parse_assignment(), 747 expressions, 748 ), 749 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 750 ), 751 } 752 753 def _parse_use(self) -> exp.Use: 754 if self._match_text_seq("SECONDARY", "ROLES"): 755 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 756 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 757 return self.expression( 758 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 759 ) 760 761 return super()._parse_use() 762 763 def _negate_range( 764 self, this: t.Optional[exp.Expression] = None 765 ) -> t.Optional[exp.Expression]: 766 if not this: 767 return this 768 769 query = this.args.get("query") 770 if isinstance(this, exp.In) and isinstance(query, exp.Query): 771 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 772 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 773 # which can produce different results (most likely a SnowFlake bug). 774 # 775 # https://docs.snowflake.com/en/sql-reference/functions/in 776 # Context: https://github.com/tobymao/sqlglot/issues/3890 777 return self.expression( 778 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 779 ) 780 781 return self.expression(exp.Not, this=this) 782 783 def _parse_tag(self) -> exp.Tags: 784 return self.expression( 785 exp.Tags, 786 expressions=self._parse_wrapped_csv(self._parse_property), 787 ) 788 789 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 790 if self._prev.token_type != TokenType.WITH: 791 self._retreat(self._index - 1) 792 793 if self._match_text_seq("MASKING", "POLICY"): 794 policy = self._parse_column() 795 return self.expression( 796 exp.MaskingPolicyColumnConstraint, 797 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 798 expressions=self._match(TokenType.USING) 799 and self._parse_wrapped_csv(self._parse_id_var), 800 ) 801 if self._match_text_seq("PROJECTION", "POLICY"): 802 policy = self._parse_column() 803 return self.expression( 804 exp.ProjectionPolicyColumnConstraint, 805 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 806 ) 807 if self._match(TokenType.TAG): 808 return self._parse_tag() 809 810 return None 811 812 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 813 if self._match(TokenType.TAG): 814 return self._parse_tag() 815 816 return super()._parse_with_property() 817 818 def _parse_create(self) -> exp.Create | exp.Command: 819 expression = super()._parse_create() 820 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 821 # Replace the Table node with the enclosed Identifier 822 expression.this.replace(expression.this.this) 823 824 return expression 825 826 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 827 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 828 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 829 this = self._parse_var() or self._parse_type() 830 831 if not this: 832 return None 833 834 self._match(TokenType.COMMA) 835 expression = self._parse_bitwise() 836 this = map_date_part(this) 837 name = this.name.upper() 838 839 if name.startswith("EPOCH"): 840 if name == "EPOCH_MILLISECOND": 841 scale = 10**3 842 elif name == "EPOCH_MICROSECOND": 843 scale = 10**6 844 elif name == "EPOCH_NANOSECOND": 845 scale = 10**9 846 else: 847 scale = None 848 849 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 850 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 851 852 if scale: 853 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 854 855 return to_unix 856 857 return self.expression(exp.Extract, this=this, expression=expression) 858 859 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 860 if is_map: 861 # Keys are strings in Snowflake's objects, see also: 862 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 863 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 864 return self._parse_slice(self._parse_string()) 865 866 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 867 868 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 869 lateral = super()._parse_lateral() 870 if not lateral: 871 return lateral 872 873 if isinstance(lateral.this, exp.Explode): 874 table_alias = lateral.args.get("alias") 875 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 876 if table_alias and not table_alias.args.get("columns"): 877 table_alias.set("columns", columns) 878 elif not table_alias: 879 exp.alias_(lateral, "_flattened", table=columns, copy=False) 880 881 return lateral 882 883 def _parse_table_parts( 884 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 885 ) -> exp.Table: 886 # https://docs.snowflake.com/en/user-guide/querying-stage 887 if self._match(TokenType.STRING, advance=False): 888 table = self._parse_string() 889 elif self._match_text_seq("@", advance=False): 890 table = self._parse_location_path() 891 else: 892 table = None 893 894 if table: 895 file_format = None 896 pattern = None 897 898 wrapped = self._match(TokenType.L_PAREN) 899 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 900 if self._match_text_seq("FILE_FORMAT", "=>"): 901 file_format = self._parse_string() or super()._parse_table_parts( 902 is_db_reference=is_db_reference 903 ) 904 elif self._match_text_seq("PATTERN", "=>"): 905 pattern = self._parse_string() 906 else: 907 break 908 909 self._match(TokenType.COMMA) 910 911 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 912 else: 913 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 914 915 return table 916 917 def _parse_table( 918 self, 919 schema: bool = False, 920 joins: bool = False, 921 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 922 parse_bracket: bool = False, 923 is_db_reference: bool = False, 924 parse_partition: bool = False, 925 consume_pipe: bool = False, 926 ) -> t.Optional[exp.Expression]: 927 table = super()._parse_table( 928 schema=schema, 929 joins=joins, 930 alias_tokens=alias_tokens, 931 parse_bracket=parse_bracket, 932 is_db_reference=is_db_reference, 933 parse_partition=parse_partition, 934 ) 935 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 936 table_from_rows = table.this 937 for arg in exp.TableFromRows.arg_types: 938 if arg != "this": 939 table_from_rows.set(arg, table.args.get(arg)) 940 941 table = table_from_rows 942 943 return table 944 945 def _parse_id_var( 946 self, 947 any_token: bool = True, 948 tokens: t.Optional[t.Collection[TokenType]] = None, 949 ) -> t.Optional[exp.Expression]: 950 if self._match_text_seq("IDENTIFIER", "("): 951 identifier = ( 952 super()._parse_id_var(any_token=any_token, tokens=tokens) 953 or self._parse_string() 954 ) 955 self._match_r_paren() 956 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 957 958 return super()._parse_id_var(any_token=any_token, tokens=tokens) 959 960 def _parse_show_snowflake(self, this: str) -> exp.Show: 961 scope = None 962 scope_kind = None 963 964 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 965 # which is syntactically valid but has no effect on the output 966 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 967 968 history = self._match_text_seq("HISTORY") 969 970 like = self._parse_string() if self._match(TokenType.LIKE) else None 971 972 if self._match(TokenType.IN): 973 if self._match_text_seq("ACCOUNT"): 974 scope_kind = "ACCOUNT" 975 elif self._match_text_seq("CLASS"): 976 scope_kind = "CLASS" 977 scope = self._parse_table_parts() 978 elif self._match_text_seq("APPLICATION"): 979 scope_kind = "APPLICATION" 980 if self._match_text_seq("PACKAGE"): 981 scope_kind += " PACKAGE" 982 scope = self._parse_table_parts() 983 elif self._match_set(self.DB_CREATABLES): 984 scope_kind = self._prev.text.upper() 985 if self._curr: 986 scope = self._parse_table_parts() 987 elif self._curr: 988 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 989 scope = self._parse_table_parts() 990 991 return self.expression( 992 exp.Show, 993 **{ 994 "terse": terse, 995 "this": this, 996 "history": history, 997 "like": like, 998 "scope": scope, 999 "scope_kind": scope_kind, 1000 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1001 "limit": self._parse_limit(), 1002 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1003 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1004 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1005 }, 1006 ) 1007 1008 def _parse_put(self) -> exp.Put | exp.Command: 1009 if self._curr.token_type != TokenType.STRING: 1010 return self._parse_as_command(self._prev) 1011 1012 return self.expression( 1013 exp.Put, 1014 this=self._parse_string(), 1015 target=self._parse_location_path(), 1016 properties=self._parse_properties(), 1017 ) 1018 1019 def _parse_get(self) -> t.Optional[exp.Expression]: 1020 start = self._prev 1021 1022 # If we detect GET( then we need to parse a function, not a statement 1023 if self._match(TokenType.L_PAREN): 1024 self._retreat(self._index - 2) 1025 return self._parse_expression() 1026 1027 target = self._parse_location_path() 1028 1029 # Parse as command if unquoted file path 1030 if self._curr.token_type == TokenType.URI_START: 1031 return self._parse_as_command(start) 1032 1033 return self.expression( 1034 exp.Get, 1035 this=self._parse_string(), 1036 target=target, 1037 properties=self._parse_properties(), 1038 ) 1039 1040 def _parse_location_property(self) -> exp.LocationProperty: 1041 self._match(TokenType.EQ) 1042 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1043 1044 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1045 # Parse either a subquery or a staged file 1046 return ( 1047 self._parse_select(table=True, parse_subquery_alias=False) 1048 if self._match(TokenType.L_PAREN, advance=False) 1049 else self._parse_table_parts() 1050 ) 1051 1052 def _parse_location_path(self) -> exp.Var: 1053 start = self._curr 1054 self._advance_any(ignore_reserved=True) 1055 1056 # We avoid consuming a comma token because external tables like @foo and @bar 1057 # can be joined in a query with a comma separator, as well as closing paren 1058 # in case of subqueries 1059 while self._is_connected() and not self._match_set( 1060 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1061 ): 1062 self._advance_any(ignore_reserved=True) 1063 1064 return exp.var(self._find_sql(start, self._prev)) 1065 1066 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1067 this = super()._parse_lambda_arg() 1068 1069 if not this: 1070 return this 1071 1072 typ = self._parse_types() 1073 1074 if typ: 1075 return self.expression(exp.Cast, this=this, to=typ) 1076 1077 return this 1078 1079 def _parse_foreign_key(self) -> exp.ForeignKey: 1080 # inlineFK, the REFERENCES columns are implied 1081 if self._match(TokenType.REFERENCES, advance=False): 1082 return self.expression(exp.ForeignKey) 1083 1084 # outoflineFK, explicitly names the columns 1085 return super()._parse_foreign_key() 1086 1087 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1088 self._match(TokenType.EQ) 1089 if self._match(TokenType.L_PAREN, advance=False): 1090 expressions = self._parse_wrapped_options() 1091 else: 1092 expressions = [self._parse_format_name()] 1093 1094 return self.expression( 1095 exp.FileFormatProperty, 1096 expressions=expressions, 1097 ) 1098 1099 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1100 return self.expression( 1101 exp.CredentialsProperty, 1102 expressions=self._parse_wrapped_options(), 1103 ) 1104 1105 def _parse_semantic_view(self) -> exp.SemanticView: 1106 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1107 1108 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1109 if self._match_text_seq("DIMENSIONS"): 1110 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1111 if self._match_text_seq("METRICS"): 1112 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1113 if self._match_text_seq("WHERE"): 1114 kwargs["where"] = self._parse_expression() 1115 1116 return self.expression(exp.SemanticView, **kwargs) 1117 1118 class Tokenizer(tokens.Tokenizer): 1119 STRING_ESCAPES = ["\\", "'"] 1120 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1121 RAW_STRINGS = ["$$"] 1122 COMMENTS = ["--", "//", ("/*", "*/")] 1123 NESTED_COMMENTS = False 1124 1125 KEYWORDS = { 1126 **tokens.Tokenizer.KEYWORDS, 1127 "FILE://": TokenType.URI_START, 1128 "BYTEINT": TokenType.INT, 1129 "EXCLUDE": TokenType.EXCEPT, 1130 "FILE FORMAT": TokenType.FILE_FORMAT, 1131 "GET": TokenType.GET, 1132 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1133 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1134 "MINUS": TokenType.EXCEPT, 1135 "NCHAR VARYING": TokenType.VARCHAR, 1136 "PUT": TokenType.PUT, 1137 "REMOVE": TokenType.COMMAND, 1138 "RM": TokenType.COMMAND, 1139 "SAMPLE": TokenType.TABLE_SAMPLE, 1140 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1141 "SQL_DOUBLE": TokenType.DOUBLE, 1142 "SQL_VARCHAR": TokenType.VARCHAR, 1143 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1144 "TAG": TokenType.TAG, 1145 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1146 "TOP": TokenType.TOP, 1147 "WAREHOUSE": TokenType.WAREHOUSE, 1148 "STAGE": TokenType.STAGE, 1149 "STREAMLIT": TokenType.STREAMLIT, 1150 } 1151 KEYWORDS.pop("/*+") 1152 1153 SINGLE_TOKENS = { 1154 **tokens.Tokenizer.SINGLE_TOKENS, 1155 "$": TokenType.PARAMETER, 1156 } 1157 1158 VAR_SINGLE_TOKENS = {"$"} 1159 1160 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1161 1162 class Generator(generator.Generator): 1163 PARAMETER_TOKEN = "$" 1164 MATCHED_BY_SOURCE = False 1165 SINGLE_STRING_INTERVAL = True 1166 JOIN_HINTS = False 1167 TABLE_HINTS = False 1168 QUERY_HINTS = False 1169 AGGREGATE_FILTER_SUPPORTED = False 1170 SUPPORTS_TABLE_COPY = False 1171 COLLATE_IS_FUNC = True 1172 LIMIT_ONLY_LITERALS = True 1173 JSON_KEY_VALUE_PAIR_SEP = "," 1174 INSERT_OVERWRITE = " OVERWRITE INTO" 1175 STRUCT_DELIMITER = ("(", ")") 1176 COPY_PARAMS_ARE_WRAPPED = False 1177 COPY_PARAMS_EQ_REQUIRED = True 1178 STAR_EXCEPT = "EXCLUDE" 1179 SUPPORTS_EXPLODING_PROJECTIONS = False 1180 ARRAY_CONCAT_IS_VAR_LEN = False 1181 SUPPORTS_CONVERT_TIMEZONE = True 1182 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1183 SUPPORTS_MEDIAN = True 1184 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1185 SUPPORTS_DECODE_CASE = True 1186 IS_BOOL_ALLOWED = False 1187 1188 TRANSFORMS = { 1189 **generator.Generator.TRANSFORMS, 1190 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1191 exp.ArgMax: rename_func("MAX_BY"), 1192 exp.ArgMin: rename_func("MIN_BY"), 1193 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1194 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1195 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1196 exp.AtTimeZone: lambda self, e: self.func( 1197 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1198 ), 1199 exp.BitwiseOr: rename_func("BITOR"), 1200 exp.BitwiseXor: rename_func("BITXOR"), 1201 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1202 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1203 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1204 exp.DateAdd: date_delta_sql("DATEADD"), 1205 exp.DateDiff: date_delta_sql("DATEDIFF"), 1206 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1207 exp.DatetimeDiff: timestampdiff_sql, 1208 exp.DateStrToDate: datestrtodate_sql, 1209 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1210 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1211 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1212 exp.DayOfYear: rename_func("DAYOFYEAR"), 1213 exp.Explode: rename_func("FLATTEN"), 1214 exp.Extract: lambda self, e: self.func( 1215 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1216 ), 1217 exp.FileFormatProperty: lambda self, 1218 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1219 exp.FromTimeZone: lambda self, e: self.func( 1220 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1221 ), 1222 exp.GenerateSeries: lambda self, e: self.func( 1223 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1224 ), 1225 exp.GetExtract: rename_func("GET"), 1226 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1227 exp.If: if_sql(name="IFF", false_value="NULL"), 1228 exp.JSONExtractArray: _json_extract_value_array_sql, 1229 exp.JSONExtractScalar: lambda self, e: self.func( 1230 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1231 ), 1232 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1233 exp.JSONPathRoot: lambda *_: "", 1234 exp.JSONValueArray: _json_extract_value_array_sql, 1235 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1236 rename_func("EDITDISTANCE") 1237 ), 1238 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1239 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1240 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1241 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1242 exp.MakeInterval: no_make_interval_sql, 1243 exp.Max: max_or_greatest, 1244 exp.Min: min_or_least, 1245 exp.ParseJSON: lambda self, e: self.func( 1246 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1247 ), 1248 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1249 exp.PercentileCont: transforms.preprocess( 1250 [transforms.add_within_group_for_percentiles] 1251 ), 1252 exp.PercentileDisc: transforms.preprocess( 1253 [transforms.add_within_group_for_percentiles] 1254 ), 1255 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1256 exp.RegexpExtract: _regexpextract_sql, 1257 exp.RegexpExtractAll: _regexpextract_sql, 1258 exp.RegexpILike: _regexpilike_sql, 1259 exp.Rand: rename_func("RANDOM"), 1260 exp.Select: transforms.preprocess( 1261 [ 1262 transforms.eliminate_window_clause, 1263 transforms.eliminate_distinct_on, 1264 transforms.explode_projection_to_unnest(), 1265 transforms.eliminate_semi_and_anti_joins, 1266 _transform_generate_date_array, 1267 _qualify_unnested_columns, 1268 _eliminate_dot_variant_lookup, 1269 ] 1270 ), 1271 exp.SHA: rename_func("SHA1"), 1272 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1273 exp.StartsWith: rename_func("STARTSWITH"), 1274 exp.EndsWith: rename_func("ENDSWITH"), 1275 exp.StrPosition: lambda self, e: strposition_sql( 1276 self, e, func_name="CHARINDEX", supports_position=True 1277 ), 1278 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1279 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1280 exp.Stuff: rename_func("INSERT"), 1281 exp.StPoint: rename_func("ST_MAKEPOINT"), 1282 exp.TimeAdd: date_delta_sql("TIMEADD"), 1283 exp.Timestamp: no_timestamp_sql, 1284 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1285 exp.TimestampDiff: lambda self, e: self.func( 1286 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1287 ), 1288 exp.TimestampTrunc: timestamptrunc_sql(), 1289 exp.TimeStrToTime: timestrtotime_sql, 1290 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1291 exp.ToArray: rename_func("TO_ARRAY"), 1292 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1293 exp.ToDouble: rename_func("TO_DOUBLE"), 1294 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1295 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1296 exp.TsOrDsToDate: lambda self, e: self.func( 1297 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1298 ), 1299 exp.TsOrDsToTime: lambda self, e: self.func( 1300 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1301 ), 1302 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1303 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1304 exp.Uuid: rename_func("UUID_STRING"), 1305 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1306 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1307 exp.Xor: rename_func("BOOLXOR"), 1308 } 1309 1310 SUPPORTED_JSON_PATH_PARTS = { 1311 exp.JSONPathKey, 1312 exp.JSONPathRoot, 1313 exp.JSONPathSubscript, 1314 } 1315 1316 TYPE_MAPPING = { 1317 **generator.Generator.TYPE_MAPPING, 1318 exp.DataType.Type.NESTED: "OBJECT", 1319 exp.DataType.Type.STRUCT: "OBJECT", 1320 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1321 } 1322 1323 TOKEN_MAPPING = { 1324 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1325 } 1326 1327 PROPERTIES_LOCATION = { 1328 **generator.Generator.PROPERTIES_LOCATION, 1329 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1330 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1331 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1332 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1333 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1334 } 1335 1336 UNSUPPORTED_VALUES_EXPRESSIONS = { 1337 exp.Map, 1338 exp.StarMap, 1339 exp.Struct, 1340 exp.VarMap, 1341 } 1342 1343 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1344 1345 def with_properties(self, properties: exp.Properties) -> str: 1346 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1347 1348 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1349 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1350 values_as_table = False 1351 1352 return super().values_sql(expression, values_as_table=values_as_table) 1353 1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression) 1365 1366 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1367 return self.func( 1368 "TO_NUMBER", 1369 expression.this, 1370 expression.args.get("format"), 1371 expression.args.get("precision"), 1372 expression.args.get("scale"), 1373 ) 1374 1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1382 1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix) 1390 1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 # Snowflake requires that TRY_CAST's value be a string 1400 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1401 # if we can deduce that the value is a string, then we can generate TRY_CAST 1402 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1403 return super().trycast_sql(expression) 1404 1405 return self.cast_sql(expression) 1406 1407 def log_sql(self, expression: exp.Log) -> str: 1408 if not expression.expression: 1409 return self.func("LN", expression.this) 1410 1411 return super().log_sql(expression) 1412 1413 def unnest_sql(self, expression: exp.Unnest) -> str: 1414 unnest_alias = expression.args.get("alias") 1415 offset = expression.args.get("offset") 1416 1417 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1418 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1419 1420 columns = [ 1421 exp.to_identifier("seq"), 1422 exp.to_identifier("key"), 1423 exp.to_identifier("path"), 1424 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1425 value, 1426 exp.to_identifier("this"), 1427 ] 1428 1429 if unnest_alias: 1430 unnest_alias.set("columns", columns) 1431 else: 1432 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1433 1434 table_input = self.sql(expression.expressions[0]) 1435 if not table_input.startswith("INPUT =>"): 1436 table_input = f"INPUT => {table_input}" 1437 1438 expression_parent = expression.parent 1439 1440 explode = ( 1441 f"FLATTEN({table_input})" 1442 if isinstance(expression_parent, exp.Lateral) 1443 else f"TABLE(FLATTEN({table_input}))" 1444 ) 1445 alias = self.sql(unnest_alias) 1446 alias = f" AS {alias}" if alias else "" 1447 value = ( 1448 "" 1449 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1450 else f"{value} FROM " 1451 ) 1452 1453 return f"{value}{explode}{alias}" 1454 1455 def show_sql(self, expression: exp.Show) -> str: 1456 terse = "TERSE " if expression.args.get("terse") else "" 1457 history = " HISTORY" if expression.args.get("history") else "" 1458 like = self.sql(expression, "like") 1459 like = f" LIKE {like}" if like else "" 1460 1461 scope = self.sql(expression, "scope") 1462 scope = f" {scope}" if scope else "" 1463 1464 scope_kind = self.sql(expression, "scope_kind") 1465 if scope_kind: 1466 scope_kind = f" IN {scope_kind}" 1467 1468 starts_with = self.sql(expression, "starts_with") 1469 if starts_with: 1470 starts_with = f" STARTS WITH {starts_with}" 1471 1472 limit = self.sql(expression, "limit") 1473 1474 from_ = self.sql(expression, "from") 1475 if from_: 1476 from_ = f" FROM {from_}" 1477 1478 privileges = self.expressions(expression, key="privileges", flat=True) 1479 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1480 1481 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1482 1483 def describe_sql(self, expression: exp.Describe) -> str: 1484 # Default to table if kind is unknown 1485 kind_value = expression.args.get("kind") or "TABLE" 1486 kind = f" {kind_value}" if kind_value else "" 1487 this = f" {self.sql(expression, 'this')}" 1488 expressions = self.expressions(expression, flat=True) 1489 expressions = f" {expressions}" if expressions else "" 1490 return f"DESCRIBE{kind}{this}{expressions}" 1491 1492 def generatedasidentitycolumnconstraint_sql( 1493 self, expression: exp.GeneratedAsIdentityColumnConstraint 1494 ) -> str: 1495 start = expression.args.get("start") 1496 start = f" START {start}" if start else "" 1497 increment = expression.args.get("increment") 1498 increment = f" INCREMENT {increment}" if increment else "" 1499 1500 order = expression.args.get("order") 1501 if order is not None: 1502 order_clause = " ORDER" if order else " NOORDER" 1503 else: 1504 order_clause = "" 1505 1506 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1507 1508 def cluster_sql(self, expression: exp.Cluster) -> str: 1509 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1510 1511 def struct_sql(self, expression: exp.Struct) -> str: 1512 keys = [] 1513 values = [] 1514 1515 for i, e in enumerate(expression.expressions): 1516 if isinstance(e, exp.PropertyEQ): 1517 keys.append( 1518 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1519 ) 1520 values.append(e.expression) 1521 else: 1522 keys.append(exp.Literal.string(f"_{i}")) 1523 values.append(e) 1524 1525 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1526 1527 @unsupported_args("weight", "accuracy") 1528 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1529 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1530 1531 def alterset_sql(self, expression: exp.AlterSet) -> str: 1532 exprs = self.expressions(expression, flat=True) 1533 exprs = f" {exprs}" if exprs else "" 1534 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1535 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1536 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1537 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1538 tag = self.expressions(expression, key="tag", flat=True) 1539 tag = f" TAG {tag}" if tag else "" 1540 1541 return f"SET{exprs}{file_format}{copy_options}{tag}" 1542 1543 def strtotime_sql(self, expression: exp.StrToTime): 1544 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1545 return self.func( 1546 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1547 ) 1548 1549 def timestampsub_sql(self, expression: exp.TimestampSub): 1550 return self.sql( 1551 exp.TimestampAdd( 1552 this=expression.this, 1553 expression=expression.expression * -1, 1554 unit=expression.unit, 1555 ) 1556 ) 1557 1558 def jsonextract_sql(self, expression: exp.JSONExtract): 1559 this = expression.this 1560 1561 # JSON strings are valid coming from other dialects such as BQ so 1562 # for these cases we PARSE_JSON preemptively 1563 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1564 "requires_json" 1565 ): 1566 this = exp.ParseJSON(this=this) 1567 1568 return self.func( 1569 "GET_PATH", 1570 this, 1571 expression.expression, 1572 ) 1573 1574 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1575 this = expression.this 1576 if this.is_string: 1577 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1578 1579 return self.func("TO_CHAR", this, self.format_time(expression)) 1580 1581 def datesub_sql(self, expression: exp.DateSub) -> str: 1582 value = expression.expression 1583 if value: 1584 value.replace(value * (-1)) 1585 else: 1586 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1587 1588 return date_delta_sql("DATEADD")(self, expression) 1589 1590 def select_sql(self, expression: exp.Select) -> str: 1591 limit = expression.args.get("limit") 1592 offset = expression.args.get("offset") 1593 if offset and not limit: 1594 expression.limit(exp.Null(), copy=False) 1595 return super().select_sql(expression) 1596 1597 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1598 is_materialized = expression.find(exp.MaterializedProperty) 1599 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1600 1601 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1602 # For materialized views, COPY GRANTS is located *before* the columns list 1603 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1604 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1605 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1607 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1608 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1609 1610 this_name = self.sql(expression.this, "this") 1611 copy_grants = self.sql(copy_grants_property) 1612 this_schema = self.schema_columns_sql(expression.this) 1613 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1614 1615 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1616 1617 return super().createable_sql(expression, locations) 1618 1619 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1620 this = expression.this 1621 1622 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1623 # and add it later as part of the WITHIN GROUP clause 1624 order = this if isinstance(this, exp.Order) else None 1625 if order: 1626 expression.set("this", order.this.pop()) 1627 1628 expr_sql = super().arrayagg_sql(expression) 1629 1630 if order: 1631 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1632 1633 return expr_sql 1634 1635 def array_sql(self, expression: exp.Array) -> str: 1636 expressions = expression.expressions 1637 1638 first_expr = seq_get(expressions, 0) 1639 if isinstance(first_expr, exp.Select): 1640 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1641 if first_expr.text("kind").upper() == "STRUCT": 1642 object_construct_args = [] 1643 for expr in first_expr.expressions: 1644 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1645 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1646 name = expr.this if isinstance(expr, exp.Alias) else expr 1647 1648 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1649 1650 array_agg = exp.ArrayAgg( 1651 this=_build_object_construct(args=object_construct_args) 1652 ) 1653 1654 first_expr.set("kind", None) 1655 first_expr.set("expressions", [array_agg]) 1656 1657 return self.sql(first_expr.subquery()) 1658 1659 return inline_array_sql(self, expression) 1660 1661 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1662 zone = self.sql(expression, "this") 1663 if not zone: 1664 return super().currentdate_sql(expression) 1665 1666 expr = exp.Cast( 1667 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1668 to=exp.DataType(this=exp.DataType.Type.DATE), 1669 ) 1670 return self.sql(expr) 1671 1672 def dot_sql(self, expression: exp.Dot) -> str: 1673 this = expression.this 1674 1675 if not this.type: 1676 from sqlglot.optimizer.annotate_types import annotate_types 1677 1678 this = annotate_types(this, dialect=self.dialect) 1679 1680 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1681 # Generate colon notation for the top level STRUCT 1682 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1683 1684 return super().dot_sql(expression)
486class Snowflake(Dialect): 487 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 488 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 489 NULL_ORDERING = "nulls_are_large" 490 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 491 SUPPORTS_USER_DEFINED_TYPES = False 492 SUPPORTS_SEMI_ANTI_JOIN = False 493 PREFER_CTE_ALIAS_COLUMN = True 494 TABLESAMPLE_SIZE_IS_PERCENT = True 495 COPY_PARAMS_ARE_CSV = False 496 ARRAY_AGG_INCLUDES_NULLS = None 497 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 498 TRY_CAST_REQUIRES_STRING = True 499 500 TIME_MAPPING = { 501 "YYYY": "%Y", 502 "yyyy": "%Y", 503 "YY": "%y", 504 "yy": "%y", 505 "MMMM": "%B", 506 "mmmm": "%B", 507 "MON": "%b", 508 "mon": "%b", 509 "MM": "%m", 510 "mm": "%m", 511 "DD": "%d", 512 "dd": "%-d", 513 "DY": "%a", 514 "dy": "%w", 515 "HH24": "%H", 516 "hh24": "%H", 517 "HH12": "%I", 518 "hh12": "%I", 519 "MI": "%M", 520 "mi": "%M", 521 "SS": "%S", 522 "ss": "%S", 523 "FF6": "%f", 524 "ff6": "%f", 525 } 526 527 DATE_PART_MAPPING = { 528 **Dialect.DATE_PART_MAPPING, 529 "ISOWEEK": "WEEKISO", 530 } 531 532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify) 543 544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$") 547 548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.EXCEPT, 557 TokenType.MATCH_CONDITION, 558 } 559 560 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 561 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 562 563 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 564 565 FUNCTIONS = { 566 **parser.Parser.FUNCTIONS, 567 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 568 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 569 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 570 this=seq_get(args, 1), expression=seq_get(args, 0) 571 ), 572 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 573 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 574 start=seq_get(args, 0), 575 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 576 step=seq_get(args, 2), 577 ), 578 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 580 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 582 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 583 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 584 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 585 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 586 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 587 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 588 "DATE_TRUNC": _date_trunc_to_time, 589 "DATEADD": _build_date_time_add(exp.DateAdd), 590 "DATEDIFF": _build_datediff, 591 "DIV0": _build_if_from_div0, 592 "EDITDISTANCE": lambda args: exp.Levenshtein( 593 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 594 ), 595 "FLATTEN": exp.Explode.from_arg_list, 596 "GET": exp.GetExtract.from_arg_list, 597 "GET_PATH": lambda args, dialect: exp.JSONExtract( 598 this=seq_get(args, 0), 599 expression=dialect.to_json_path(seq_get(args, 1)), 600 requires_json=True, 601 ), 602 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 603 "IFF": exp.If.from_arg_list, 604 "LAST_DAY": lambda args: exp.LastDay( 605 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 606 ), 607 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 608 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 609 "NULLIFZERO": _build_if_from_nullifzero, 610 "OBJECT_CONSTRUCT": _build_object_construct, 611 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 612 "REGEXP_REPLACE": _build_regexp_replace, 613 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 614 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 615 "REPLACE": build_replace_with_optional_replacement, 616 "RLIKE": exp.RegexpLike.from_arg_list, 617 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 618 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 619 "TIMEADD": _build_date_time_add(exp.TimeAdd), 620 "TIMEDIFF": _build_datediff, 621 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 622 "TIMESTAMPDIFF": _build_datediff, 623 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 625 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 626 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 627 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 628 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 629 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 630 "TRY_TO_TIMESTAMP": _build_datetime( 631 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 632 ), 633 "TO_CHAR": build_timetostr_or_tochar, 634 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 635 "TO_NUMBER": lambda args: exp.ToNumber( 636 this=seq_get(args, 0), 637 format=seq_get(args, 1), 638 precision=seq_get(args, 2), 639 scale=seq_get(args, 3), 640 ), 641 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 642 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 644 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 645 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 646 "TO_VARCHAR": exp.ToChar.from_arg_list, 647 "ZEROIFNULL": _build_if_from_zeroifnull, 648 } 649 650 FUNCTION_PARSERS = { 651 **parser.Parser.FUNCTION_PARSERS, 652 "DATE_PART": lambda self: self._parse_date_part(), 653 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 654 "LISTAGG": lambda self: self._parse_string_agg(), 655 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 656 } 657 FUNCTION_PARSERS.pop("TRIM") 658 659 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 660 661 ALTER_PARSERS = { 662 **parser.Parser.ALTER_PARSERS, 663 "UNSET": lambda self: self.expression( 664 exp.Set, 665 tag=self._match_text_seq("TAG"), 666 expressions=self._parse_csv(self._parse_id_var), 667 unset=True, 668 ), 669 } 670 671 STATEMENT_PARSERS = { 672 **parser.Parser.STATEMENT_PARSERS, 673 TokenType.GET: lambda self: self._parse_get(), 674 TokenType.PUT: lambda self: self._parse_put(), 675 TokenType.SHOW: lambda self: self._parse_show(), 676 } 677 678 PROPERTY_PARSERS = { 679 **parser.Parser.PROPERTY_PARSERS, 680 "CREDENTIALS": lambda self: self._parse_credentials_property(), 681 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 682 "LOCATION": lambda self: self._parse_location_property(), 683 "TAG": lambda self: self._parse_tag(), 684 "USING": lambda self: self._match_text_seq("TEMPLATE") 685 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 686 } 687 688 TYPE_CONVERTERS = { 689 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 690 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 691 } 692 693 SHOW_PARSERS = { 694 "DATABASES": _show_parser("DATABASES"), 695 "TERSE DATABASES": _show_parser("DATABASES"), 696 "SCHEMAS": _show_parser("SCHEMAS"), 697 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 698 "OBJECTS": _show_parser("OBJECTS"), 699 "TERSE OBJECTS": _show_parser("OBJECTS"), 700 "TABLES": _show_parser("TABLES"), 701 "TERSE TABLES": _show_parser("TABLES"), 702 "VIEWS": _show_parser("VIEWS"), 703 "TERSE VIEWS": _show_parser("VIEWS"), 704 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 706 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 708 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 710 "SEQUENCES": _show_parser("SEQUENCES"), 711 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 712 "STAGES": _show_parser("STAGES"), 713 "COLUMNS": _show_parser("COLUMNS"), 714 "USERS": _show_parser("USERS"), 715 "TERSE USERS": _show_parser("USERS"), 716 "FILE FORMATS": _show_parser("FILE FORMATS"), 717 "FUNCTIONS": _show_parser("FUNCTIONS"), 718 "PROCEDURES": _show_parser("PROCEDURES"), 719 "WAREHOUSES": _show_parser("WAREHOUSES"), 720 } 721 722 CONSTRAINT_PARSERS = { 723 **parser.Parser.CONSTRAINT_PARSERS, 724 "WITH": lambda self: self._parse_with_constraint(), 725 "MASKING": lambda self: self._parse_with_constraint(), 726 "PROJECTION": lambda self: self._parse_with_constraint(), 727 "TAG": lambda self: self._parse_with_constraint(), 728 } 729 730 STAGED_FILE_SINGLE_TOKENS = { 731 TokenType.DOT, 732 TokenType.MOD, 733 TokenType.SLASH, 734 } 735 736 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 737 738 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 739 740 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 741 742 LAMBDAS = { 743 **parser.Parser.LAMBDAS, 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 751 ), 752 } 753 754 def _parse_use(self) -> exp.Use: 755 if self._match_text_seq("SECONDARY", "ROLES"): 756 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 757 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 758 return self.expression( 759 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 760 ) 761 762 return super()._parse_use() 763 764 def _negate_range( 765 self, this: t.Optional[exp.Expression] = None 766 ) -> t.Optional[exp.Expression]: 767 if not this: 768 return this 769 770 query = this.args.get("query") 771 if isinstance(this, exp.In) and isinstance(query, exp.Query): 772 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 773 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 774 # which can produce different results (most likely a SnowFlake bug). 775 # 776 # https://docs.snowflake.com/en/sql-reference/functions/in 777 # Context: https://github.com/tobymao/sqlglot/issues/3890 778 return self.expression( 779 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 780 ) 781 782 return self.expression(exp.Not, this=this) 783 784 def _parse_tag(self) -> exp.Tags: 785 return self.expression( 786 exp.Tags, 787 expressions=self._parse_wrapped_csv(self._parse_property), 788 ) 789 790 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 791 if self._prev.token_type != TokenType.WITH: 792 self._retreat(self._index - 1) 793 794 if self._match_text_seq("MASKING", "POLICY"): 795 policy = self._parse_column() 796 return self.expression( 797 exp.MaskingPolicyColumnConstraint, 798 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 799 expressions=self._match(TokenType.USING) 800 and self._parse_wrapped_csv(self._parse_id_var), 801 ) 802 if self._match_text_seq("PROJECTION", "POLICY"): 803 policy = self._parse_column() 804 return self.expression( 805 exp.ProjectionPolicyColumnConstraint, 806 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 807 ) 808 if self._match(TokenType.TAG): 809 return self._parse_tag() 810 811 return None 812 813 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 814 if self._match(TokenType.TAG): 815 return self._parse_tag() 816 817 return super()._parse_with_property() 818 819 def _parse_create(self) -> exp.Create | exp.Command: 820 expression = super()._parse_create() 821 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 822 # Replace the Table node with the enclosed Identifier 823 expression.this.replace(expression.this.this) 824 825 return expression 826 827 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 828 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 829 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 830 this = self._parse_var() or self._parse_type() 831 832 if not this: 833 return None 834 835 self._match(TokenType.COMMA) 836 expression = self._parse_bitwise() 837 this = map_date_part(this) 838 name = this.name.upper() 839 840 if name.startswith("EPOCH"): 841 if name == "EPOCH_MILLISECOND": 842 scale = 10**3 843 elif name == "EPOCH_MICROSECOND": 844 scale = 10**6 845 elif name == "EPOCH_NANOSECOND": 846 scale = 10**9 847 else: 848 scale = None 849 850 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 851 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 852 853 if scale: 854 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 855 856 return to_unix 857 858 return self.expression(exp.Extract, this=this, expression=expression) 859 860 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 861 if is_map: 862 # Keys are strings in Snowflake's objects, see also: 863 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 864 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 865 return self._parse_slice(self._parse_string()) 866 867 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 868 869 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 870 lateral = super()._parse_lateral() 871 if not lateral: 872 return lateral 873 874 if isinstance(lateral.this, exp.Explode): 875 table_alias = lateral.args.get("alias") 876 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 877 if table_alias and not table_alias.args.get("columns"): 878 table_alias.set("columns", columns) 879 elif not table_alias: 880 exp.alias_(lateral, "_flattened", table=columns, copy=False) 881 882 return lateral 883 884 def _parse_table_parts( 885 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 886 ) -> exp.Table: 887 # https://docs.snowflake.com/en/user-guide/querying-stage 888 if self._match(TokenType.STRING, advance=False): 889 table = self._parse_string() 890 elif self._match_text_seq("@", advance=False): 891 table = self._parse_location_path() 892 else: 893 table = None 894 895 if table: 896 file_format = None 897 pattern = None 898 899 wrapped = self._match(TokenType.L_PAREN) 900 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 901 if self._match_text_seq("FILE_FORMAT", "=>"): 902 file_format = self._parse_string() or super()._parse_table_parts( 903 is_db_reference=is_db_reference 904 ) 905 elif self._match_text_seq("PATTERN", "=>"): 906 pattern = self._parse_string() 907 else: 908 break 909 910 self._match(TokenType.COMMA) 911 912 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 913 else: 914 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 915 916 return table 917 918 def _parse_table( 919 self, 920 schema: bool = False, 921 joins: bool = False, 922 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 923 parse_bracket: bool = False, 924 is_db_reference: bool = False, 925 parse_partition: bool = False, 926 consume_pipe: bool = False, 927 ) -> t.Optional[exp.Expression]: 928 table = super()._parse_table( 929 schema=schema, 930 joins=joins, 931 alias_tokens=alias_tokens, 932 parse_bracket=parse_bracket, 933 is_db_reference=is_db_reference, 934 parse_partition=parse_partition, 935 ) 936 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 937 table_from_rows = table.this 938 for arg in exp.TableFromRows.arg_types: 939 if arg != "this": 940 table_from_rows.set(arg, table.args.get(arg)) 941 942 table = table_from_rows 943 944 return table 945 946 def _parse_id_var( 947 self, 948 any_token: bool = True, 949 tokens: t.Optional[t.Collection[TokenType]] = None, 950 ) -> t.Optional[exp.Expression]: 951 if self._match_text_seq("IDENTIFIER", "("): 952 identifier = ( 953 super()._parse_id_var(any_token=any_token, tokens=tokens) 954 or self._parse_string() 955 ) 956 self._match_r_paren() 957 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 958 959 return super()._parse_id_var(any_token=any_token, tokens=tokens) 960 961 def _parse_show_snowflake(self, this: str) -> exp.Show: 962 scope = None 963 scope_kind = None 964 965 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 966 # which is syntactically valid but has no effect on the output 967 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 968 969 history = self._match_text_seq("HISTORY") 970 971 like = self._parse_string() if self._match(TokenType.LIKE) else None 972 973 if self._match(TokenType.IN): 974 if self._match_text_seq("ACCOUNT"): 975 scope_kind = "ACCOUNT" 976 elif self._match_text_seq("CLASS"): 977 scope_kind = "CLASS" 978 scope = self._parse_table_parts() 979 elif self._match_text_seq("APPLICATION"): 980 scope_kind = "APPLICATION" 981 if self._match_text_seq("PACKAGE"): 982 scope_kind += " PACKAGE" 983 scope = self._parse_table_parts() 984 elif self._match_set(self.DB_CREATABLES): 985 scope_kind = self._prev.text.upper() 986 if self._curr: 987 scope = self._parse_table_parts() 988 elif self._curr: 989 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 990 scope = self._parse_table_parts() 991 992 return self.expression( 993 exp.Show, 994 **{ 995 "terse": terse, 996 "this": this, 997 "history": history, 998 "like": like, 999 "scope": scope, 1000 "scope_kind": scope_kind, 1001 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1002 "limit": self._parse_limit(), 1003 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1004 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1005 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1006 }, 1007 ) 1008 1009 def _parse_put(self) -> exp.Put | exp.Command: 1010 if self._curr.token_type != TokenType.STRING: 1011 return self._parse_as_command(self._prev) 1012 1013 return self.expression( 1014 exp.Put, 1015 this=self._parse_string(), 1016 target=self._parse_location_path(), 1017 properties=self._parse_properties(), 1018 ) 1019 1020 def _parse_get(self) -> t.Optional[exp.Expression]: 1021 start = self._prev 1022 1023 # If we detect GET( then we need to parse a function, not a statement 1024 if self._match(TokenType.L_PAREN): 1025 self._retreat(self._index - 2) 1026 return self._parse_expression() 1027 1028 target = self._parse_location_path() 1029 1030 # Parse as command if unquoted file path 1031 if self._curr.token_type == TokenType.URI_START: 1032 return self._parse_as_command(start) 1033 1034 return self.expression( 1035 exp.Get, 1036 this=self._parse_string(), 1037 target=target, 1038 properties=self._parse_properties(), 1039 ) 1040 1041 def _parse_location_property(self) -> exp.LocationProperty: 1042 self._match(TokenType.EQ) 1043 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1044 1045 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1046 # Parse either a subquery or a staged file 1047 return ( 1048 self._parse_select(table=True, parse_subquery_alias=False) 1049 if self._match(TokenType.L_PAREN, advance=False) 1050 else self._parse_table_parts() 1051 ) 1052 1053 def _parse_location_path(self) -> exp.Var: 1054 start = self._curr 1055 self._advance_any(ignore_reserved=True) 1056 1057 # We avoid consuming a comma token because external tables like @foo and @bar 1058 # can be joined in a query with a comma separator, as well as closing paren 1059 # in case of subqueries 1060 while self._is_connected() and not self._match_set( 1061 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1062 ): 1063 self._advance_any(ignore_reserved=True) 1064 1065 return exp.var(self._find_sql(start, self._prev)) 1066 1067 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1068 this = super()._parse_lambda_arg() 1069 1070 if not this: 1071 return this 1072 1073 typ = self._parse_types() 1074 1075 if typ: 1076 return self.expression(exp.Cast, this=this, to=typ) 1077 1078 return this 1079 1080 def _parse_foreign_key(self) -> exp.ForeignKey: 1081 # inlineFK, the REFERENCES columns are implied 1082 if self._match(TokenType.REFERENCES, advance=False): 1083 return self.expression(exp.ForeignKey) 1084 1085 # outoflineFK, explicitly names the columns 1086 return super()._parse_foreign_key() 1087 1088 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1089 self._match(TokenType.EQ) 1090 if self._match(TokenType.L_PAREN, advance=False): 1091 expressions = self._parse_wrapped_options() 1092 else: 1093 expressions = [self._parse_format_name()] 1094 1095 return self.expression( 1096 exp.FileFormatProperty, 1097 expressions=expressions, 1098 ) 1099 1100 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1101 return self.expression( 1102 exp.CredentialsProperty, 1103 expressions=self._parse_wrapped_options(), 1104 ) 1105 1106 def _parse_semantic_view(self) -> exp.SemanticView: 1107 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1108 1109 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1110 if self._match_text_seq("DIMENSIONS"): 1111 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("METRICS"): 1113 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1114 if self._match_text_seq("WHERE"): 1115 kwargs["where"] = self._parse_expression() 1116 1117 return self.expression(exp.SemanticView, **kwargs) 1118 1119 class Tokenizer(tokens.Tokenizer): 1120 STRING_ESCAPES = ["\\", "'"] 1121 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1122 RAW_STRINGS = ["$$"] 1123 COMMENTS = ["--", "//", ("/*", "*/")] 1124 NESTED_COMMENTS = False 1125 1126 KEYWORDS = { 1127 **tokens.Tokenizer.KEYWORDS, 1128 "FILE://": TokenType.URI_START, 1129 "BYTEINT": TokenType.INT, 1130 "EXCLUDE": TokenType.EXCEPT, 1131 "FILE FORMAT": TokenType.FILE_FORMAT, 1132 "GET": TokenType.GET, 1133 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1134 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1135 "MINUS": TokenType.EXCEPT, 1136 "NCHAR VARYING": TokenType.VARCHAR, 1137 "PUT": TokenType.PUT, 1138 "REMOVE": TokenType.COMMAND, 1139 "RM": TokenType.COMMAND, 1140 "SAMPLE": TokenType.TABLE_SAMPLE, 1141 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1142 "SQL_DOUBLE": TokenType.DOUBLE, 1143 "SQL_VARCHAR": TokenType.VARCHAR, 1144 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1145 "TAG": TokenType.TAG, 1146 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1147 "TOP": TokenType.TOP, 1148 "WAREHOUSE": TokenType.WAREHOUSE, 1149 "STAGE": TokenType.STAGE, 1150 "STREAMLIT": TokenType.STREAMLIT, 1151 } 1152 KEYWORDS.pop("/*+") 1153 1154 SINGLE_TOKENS = { 1155 **tokens.Tokenizer.SINGLE_TOKENS, 1156 "$": TokenType.PARAMETER, 1157 } 1158 1159 VAR_SINGLE_TOKENS = {"$"} 1160 1161 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1162 1163 class Generator(generator.Generator): 1164 PARAMETER_TOKEN = "$" 1165 MATCHED_BY_SOURCE = False 1166 SINGLE_STRING_INTERVAL = True 1167 JOIN_HINTS = False 1168 TABLE_HINTS = False 1169 QUERY_HINTS = False 1170 AGGREGATE_FILTER_SUPPORTED = False 1171 SUPPORTS_TABLE_COPY = False 1172 COLLATE_IS_FUNC = True 1173 LIMIT_ONLY_LITERALS = True 1174 JSON_KEY_VALUE_PAIR_SEP = "," 1175 INSERT_OVERWRITE = " OVERWRITE INTO" 1176 STRUCT_DELIMITER = ("(", ")") 1177 COPY_PARAMS_ARE_WRAPPED = False 1178 COPY_PARAMS_EQ_REQUIRED = True 1179 STAR_EXCEPT = "EXCLUDE" 1180 SUPPORTS_EXPLODING_PROJECTIONS = False 1181 ARRAY_CONCAT_IS_VAR_LEN = False 1182 SUPPORTS_CONVERT_TIMEZONE = True 1183 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1184 SUPPORTS_MEDIAN = True 1185 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1186 SUPPORTS_DECODE_CASE = True 1187 IS_BOOL_ALLOWED = False 1188 1189 TRANSFORMS = { 1190 **generator.Generator.TRANSFORMS, 1191 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1192 exp.ArgMax: rename_func("MAX_BY"), 1193 exp.ArgMin: rename_func("MIN_BY"), 1194 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1195 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1196 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1197 exp.AtTimeZone: lambda self, e: self.func( 1198 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1199 ), 1200 exp.BitwiseOr: rename_func("BITOR"), 1201 exp.BitwiseXor: rename_func("BITXOR"), 1202 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1203 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1204 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1205 exp.DateAdd: date_delta_sql("DATEADD"), 1206 exp.DateDiff: date_delta_sql("DATEDIFF"), 1207 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1208 exp.DatetimeDiff: timestampdiff_sql, 1209 exp.DateStrToDate: datestrtodate_sql, 1210 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1211 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1212 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1213 exp.DayOfYear: rename_func("DAYOFYEAR"), 1214 exp.Explode: rename_func("FLATTEN"), 1215 exp.Extract: lambda self, e: self.func( 1216 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1217 ), 1218 exp.FileFormatProperty: lambda self, 1219 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1220 exp.FromTimeZone: lambda self, e: self.func( 1221 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1222 ), 1223 exp.GenerateSeries: lambda self, e: self.func( 1224 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1225 ), 1226 exp.GetExtract: rename_func("GET"), 1227 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1228 exp.If: if_sql(name="IFF", false_value="NULL"), 1229 exp.JSONExtractArray: _json_extract_value_array_sql, 1230 exp.JSONExtractScalar: lambda self, e: self.func( 1231 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1232 ), 1233 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1234 exp.JSONPathRoot: lambda *_: "", 1235 exp.JSONValueArray: _json_extract_value_array_sql, 1236 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1237 rename_func("EDITDISTANCE") 1238 ), 1239 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1240 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1241 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1242 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1243 exp.MakeInterval: no_make_interval_sql, 1244 exp.Max: max_or_greatest, 1245 exp.Min: min_or_least, 1246 exp.ParseJSON: lambda self, e: self.func( 1247 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1248 ), 1249 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1250 exp.PercentileCont: transforms.preprocess( 1251 [transforms.add_within_group_for_percentiles] 1252 ), 1253 exp.PercentileDisc: transforms.preprocess( 1254 [transforms.add_within_group_for_percentiles] 1255 ), 1256 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1257 exp.RegexpExtract: _regexpextract_sql, 1258 exp.RegexpExtractAll: _regexpextract_sql, 1259 exp.RegexpILike: _regexpilike_sql, 1260 exp.Rand: rename_func("RANDOM"), 1261 exp.Select: transforms.preprocess( 1262 [ 1263 transforms.eliminate_window_clause, 1264 transforms.eliminate_distinct_on, 1265 transforms.explode_projection_to_unnest(), 1266 transforms.eliminate_semi_and_anti_joins, 1267 _transform_generate_date_array, 1268 _qualify_unnested_columns, 1269 _eliminate_dot_variant_lookup, 1270 ] 1271 ), 1272 exp.SHA: rename_func("SHA1"), 1273 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1274 exp.StartsWith: rename_func("STARTSWITH"), 1275 exp.EndsWith: rename_func("ENDSWITH"), 1276 exp.StrPosition: lambda self, e: strposition_sql( 1277 self, e, func_name="CHARINDEX", supports_position=True 1278 ), 1279 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1280 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1281 exp.Stuff: rename_func("INSERT"), 1282 exp.StPoint: rename_func("ST_MAKEPOINT"), 1283 exp.TimeAdd: date_delta_sql("TIMEADD"), 1284 exp.Timestamp: no_timestamp_sql, 1285 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1286 exp.TimestampDiff: lambda self, e: self.func( 1287 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1288 ), 1289 exp.TimestampTrunc: timestamptrunc_sql(), 1290 exp.TimeStrToTime: timestrtotime_sql, 1291 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1292 exp.ToArray: rename_func("TO_ARRAY"), 1293 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1294 exp.ToDouble: rename_func("TO_DOUBLE"), 1295 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1296 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1297 exp.TsOrDsToDate: lambda self, e: self.func( 1298 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1299 ), 1300 exp.TsOrDsToTime: lambda self, e: self.func( 1301 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1302 ), 1303 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1304 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1305 exp.Uuid: rename_func("UUID_STRING"), 1306 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1307 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1308 exp.Xor: rename_func("BOOLXOR"), 1309 } 1310 1311 SUPPORTED_JSON_PATH_PARTS = { 1312 exp.JSONPathKey, 1313 exp.JSONPathRoot, 1314 exp.JSONPathSubscript, 1315 } 1316 1317 TYPE_MAPPING = { 1318 **generator.Generator.TYPE_MAPPING, 1319 exp.DataType.Type.NESTED: "OBJECT", 1320 exp.DataType.Type.STRUCT: "OBJECT", 1321 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1322 } 1323 1324 TOKEN_MAPPING = { 1325 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1326 } 1327 1328 PROPERTIES_LOCATION = { 1329 **generator.Generator.PROPERTIES_LOCATION, 1330 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1331 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1332 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1333 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1334 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1335 } 1336 1337 UNSUPPORTED_VALUES_EXPRESSIONS = { 1338 exp.Map, 1339 exp.StarMap, 1340 exp.Struct, 1341 exp.VarMap, 1342 } 1343 1344 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1345 1346 def with_properties(self, properties: exp.Properties) -> str: 1347 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1348 1349 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1350 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1351 values_as_table = False 1352 1353 return super().values_sql(expression, values_as_table=values_as_table) 1354 1355 def datatype_sql(self, expression: exp.DataType) -> str: 1356 expressions = expression.expressions 1357 if ( 1358 expressions 1359 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1360 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1361 ): 1362 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1363 return "OBJECT" 1364 1365 return super().datatype_sql(expression) 1366 1367 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1368 return self.func( 1369 "TO_NUMBER", 1370 expression.this, 1371 expression.args.get("format"), 1372 expression.args.get("precision"), 1373 expression.args.get("scale"), 1374 ) 1375 1376 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1377 milli = expression.args.get("milli") 1378 if milli is not None: 1379 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1380 expression.set("nano", milli_to_nano) 1381 1382 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1383 1384 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1385 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1386 return self.func("TO_GEOGRAPHY", expression.this) 1387 if expression.is_type(exp.DataType.Type.GEOMETRY): 1388 return self.func("TO_GEOMETRY", expression.this) 1389 1390 return super().cast_sql(expression, safe_prefix=safe_prefix) 1391 1392 def trycast_sql(self, expression: exp.TryCast) -> str: 1393 value = expression.this 1394 1395 if value.type is None: 1396 from sqlglot.optimizer.annotate_types import annotate_types 1397 1398 value = annotate_types(value, dialect=self.dialect) 1399 1400 # Snowflake requires that TRY_CAST's value be a string 1401 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1402 # if we can deduce that the value is a string, then we can generate TRY_CAST 1403 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1404 return super().trycast_sql(expression) 1405 1406 return self.cast_sql(expression) 1407 1408 def log_sql(self, expression: exp.Log) -> str: 1409 if not expression.expression: 1410 return self.func("LN", expression.this) 1411 1412 return super().log_sql(expression) 1413 1414 def unnest_sql(self, expression: exp.Unnest) -> str: 1415 unnest_alias = expression.args.get("alias") 1416 offset = expression.args.get("offset") 1417 1418 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1419 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1420 1421 columns = [ 1422 exp.to_identifier("seq"), 1423 exp.to_identifier("key"), 1424 exp.to_identifier("path"), 1425 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1426 value, 1427 exp.to_identifier("this"), 1428 ] 1429 1430 if unnest_alias: 1431 unnest_alias.set("columns", columns) 1432 else: 1433 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1434 1435 table_input = self.sql(expression.expressions[0]) 1436 if not table_input.startswith("INPUT =>"): 1437 table_input = f"INPUT => {table_input}" 1438 1439 expression_parent = expression.parent 1440 1441 explode = ( 1442 f"FLATTEN({table_input})" 1443 if isinstance(expression_parent, exp.Lateral) 1444 else f"TABLE(FLATTEN({table_input}))" 1445 ) 1446 alias = self.sql(unnest_alias) 1447 alias = f" AS {alias}" if alias else "" 1448 value = ( 1449 "" 1450 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1451 else f"{value} FROM " 1452 ) 1453 1454 return f"{value}{explode}{alias}" 1455 1456 def show_sql(self, expression: exp.Show) -> str: 1457 terse = "TERSE " if expression.args.get("terse") else "" 1458 history = " HISTORY" if expression.args.get("history") else "" 1459 like = self.sql(expression, "like") 1460 like = f" LIKE {like}" if like else "" 1461 1462 scope = self.sql(expression, "scope") 1463 scope = f" {scope}" if scope else "" 1464 1465 scope_kind = self.sql(expression, "scope_kind") 1466 if scope_kind: 1467 scope_kind = f" IN {scope_kind}" 1468 1469 starts_with = self.sql(expression, "starts_with") 1470 if starts_with: 1471 starts_with = f" STARTS WITH {starts_with}" 1472 1473 limit = self.sql(expression, "limit") 1474 1475 from_ = self.sql(expression, "from") 1476 if from_: 1477 from_ = f" FROM {from_}" 1478 1479 privileges = self.expressions(expression, key="privileges", flat=True) 1480 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1481 1482 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1483 1484 def describe_sql(self, expression: exp.Describe) -> str: 1485 # Default to table if kind is unknown 1486 kind_value = expression.args.get("kind") or "TABLE" 1487 kind = f" {kind_value}" if kind_value else "" 1488 this = f" {self.sql(expression, 'this')}" 1489 expressions = self.expressions(expression, flat=True) 1490 expressions = f" {expressions}" if expressions else "" 1491 return f"DESCRIBE{kind}{this}{expressions}" 1492 1493 def generatedasidentitycolumnconstraint_sql( 1494 self, expression: exp.GeneratedAsIdentityColumnConstraint 1495 ) -> str: 1496 start = expression.args.get("start") 1497 start = f" START {start}" if start else "" 1498 increment = expression.args.get("increment") 1499 increment = f" INCREMENT {increment}" if increment else "" 1500 1501 order = expression.args.get("order") 1502 if order is not None: 1503 order_clause = " ORDER" if order else " NOORDER" 1504 else: 1505 order_clause = "" 1506 1507 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1508 1509 def cluster_sql(self, expression: exp.Cluster) -> str: 1510 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1511 1512 def struct_sql(self, expression: exp.Struct) -> str: 1513 keys = [] 1514 values = [] 1515 1516 for i, e in enumerate(expression.expressions): 1517 if isinstance(e, exp.PropertyEQ): 1518 keys.append( 1519 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1520 ) 1521 values.append(e.expression) 1522 else: 1523 keys.append(exp.Literal.string(f"_{i}")) 1524 values.append(e) 1525 1526 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1527 1528 @unsupported_args("weight", "accuracy") 1529 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1530 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1531 1532 def alterset_sql(self, expression: exp.AlterSet) -> str: 1533 exprs = self.expressions(expression, flat=True) 1534 exprs = f" {exprs}" if exprs else "" 1535 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1536 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1537 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1538 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1539 tag = self.expressions(expression, key="tag", flat=True) 1540 tag = f" TAG {tag}" if tag else "" 1541 1542 return f"SET{exprs}{file_format}{copy_options}{tag}" 1543 1544 def strtotime_sql(self, expression: exp.StrToTime): 1545 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1546 return self.func( 1547 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1548 ) 1549 1550 def timestampsub_sql(self, expression: exp.TimestampSub): 1551 return self.sql( 1552 exp.TimestampAdd( 1553 this=expression.this, 1554 expression=expression.expression * -1, 1555 unit=expression.unit, 1556 ) 1557 ) 1558 1559 def jsonextract_sql(self, expression: exp.JSONExtract): 1560 this = expression.this 1561 1562 # JSON strings are valid coming from other dialects such as BQ so 1563 # for these cases we PARSE_JSON preemptively 1564 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1565 "requires_json" 1566 ): 1567 this = exp.ParseJSON(this=this) 1568 1569 return self.func( 1570 "GET_PATH", 1571 this, 1572 expression.expression, 1573 ) 1574 1575 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1576 this = expression.this 1577 if this.is_string: 1578 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1579 1580 return self.func("TO_CHAR", this, self.format_time(expression)) 1581 1582 def datesub_sql(self, expression: exp.DateSub) -> str: 1583 value = expression.expression 1584 if value: 1585 value.replace(value * (-1)) 1586 else: 1587 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1588 1589 return date_delta_sql("DATEADD")(self, expression) 1590 1591 def select_sql(self, expression: exp.Select) -> str: 1592 limit = expression.args.get("limit") 1593 offset = expression.args.get("offset") 1594 if offset and not limit: 1595 expression.limit(exp.Null(), copy=False) 1596 return super().select_sql(expression) 1597 1598 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1599 is_materialized = expression.find(exp.MaterializedProperty) 1600 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1601 1602 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1603 # For materialized views, COPY GRANTS is located *before* the columns list 1604 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1605 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1607 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1608 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1609 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1610 1611 this_name = self.sql(expression.this, "this") 1612 copy_grants = self.sql(copy_grants_property) 1613 this_schema = self.schema_columns_sql(expression.this) 1614 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1615 1616 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1617 1618 return super().createable_sql(expression, locations) 1619 1620 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1621 this = expression.this 1622 1623 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1624 # and add it later as part of the WITHIN GROUP clause 1625 order = this if isinstance(this, exp.Order) else None 1626 if order: 1627 expression.set("this", order.this.pop()) 1628 1629 expr_sql = super().arrayagg_sql(expression) 1630 1631 if order: 1632 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1633 1634 return expr_sql 1635 1636 def array_sql(self, expression: exp.Array) -> str: 1637 expressions = expression.expressions 1638 1639 first_expr = seq_get(expressions, 0) 1640 if isinstance(first_expr, exp.Select): 1641 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1642 if first_expr.text("kind").upper() == "STRUCT": 1643 object_construct_args = [] 1644 for expr in first_expr.expressions: 1645 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1646 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1647 name = expr.this if isinstance(expr, exp.Alias) else expr 1648 1649 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1650 1651 array_agg = exp.ArrayAgg( 1652 this=_build_object_construct(args=object_construct_args) 1653 ) 1654 1655 first_expr.set("kind", None) 1656 first_expr.set("expressions", [array_agg]) 1657 1658 return self.sql(first_expr.subquery()) 1659 1660 return inline_array_sql(self, expression) 1661 1662 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1663 zone = self.sql(expression, "this") 1664 if not zone: 1665 return super().currentdate_sql(expression) 1666 1667 expr = exp.Cast( 1668 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1669 to=exp.DataType(this=exp.DataType.Type.DATE), 1670 ) 1671 return self.sql(expr) 1672 1673 def dot_sql(self, expression: exp.Dot) -> str: 1674 this = expression.this 1675 1676 if not this.type: 1677 from sqlglot.optimizer.annotate_types import annotate_types 1678 1679 this = annotate_types(this, dialect=self.dialect) 1680 1681 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1682 # Generate colon notation for the top level STRUCT 1683 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1684 1685 return super().dot_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.EXCEPT, 557 TokenType.MATCH_CONDITION, 558 } 559 560 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 561 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 562 563 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 564 565 FUNCTIONS = { 566 **parser.Parser.FUNCTIONS, 567 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 568 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 569 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 570 this=seq_get(args, 1), expression=seq_get(args, 0) 571 ), 572 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 573 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 574 start=seq_get(args, 0), 575 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 576 step=seq_get(args, 2), 577 ), 578 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 580 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 582 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 583 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 584 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 585 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 586 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 587 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 588 "DATE_TRUNC": _date_trunc_to_time, 589 "DATEADD": _build_date_time_add(exp.DateAdd), 590 "DATEDIFF": _build_datediff, 591 "DIV0": _build_if_from_div0, 592 "EDITDISTANCE": lambda args: exp.Levenshtein( 593 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 594 ), 595 "FLATTEN": exp.Explode.from_arg_list, 596 "GET": exp.GetExtract.from_arg_list, 597 "GET_PATH": lambda args, dialect: exp.JSONExtract( 598 this=seq_get(args, 0), 599 expression=dialect.to_json_path(seq_get(args, 1)), 600 requires_json=True, 601 ), 602 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 603 "IFF": exp.If.from_arg_list, 604 "LAST_DAY": lambda args: exp.LastDay( 605 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 606 ), 607 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 608 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 609 "NULLIFZERO": _build_if_from_nullifzero, 610 "OBJECT_CONSTRUCT": _build_object_construct, 611 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 612 "REGEXP_REPLACE": _build_regexp_replace, 613 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 614 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 615 "REPLACE": build_replace_with_optional_replacement, 616 "RLIKE": exp.RegexpLike.from_arg_list, 617 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 618 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 619 "TIMEADD": _build_date_time_add(exp.TimeAdd), 620 "TIMEDIFF": _build_datediff, 621 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 622 "TIMESTAMPDIFF": _build_datediff, 623 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 625 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 626 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 627 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 628 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 629 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 630 "TRY_TO_TIMESTAMP": _build_datetime( 631 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 632 ), 633 "TO_CHAR": build_timetostr_or_tochar, 634 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 635 "TO_NUMBER": lambda args: exp.ToNumber( 636 this=seq_get(args, 0), 637 format=seq_get(args, 1), 638 precision=seq_get(args, 2), 639 scale=seq_get(args, 3), 640 ), 641 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 642 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 644 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 645 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 646 "TO_VARCHAR": exp.ToChar.from_arg_list, 647 "ZEROIFNULL": _build_if_from_zeroifnull, 648 } 649 650 FUNCTION_PARSERS = { 651 **parser.Parser.FUNCTION_PARSERS, 652 "DATE_PART": lambda self: self._parse_date_part(), 653 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 654 "LISTAGG": lambda self: self._parse_string_agg(), 655 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 656 } 657 FUNCTION_PARSERS.pop("TRIM") 658 659 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 660 661 ALTER_PARSERS = { 662 **parser.Parser.ALTER_PARSERS, 663 "UNSET": lambda self: self.expression( 664 exp.Set, 665 tag=self._match_text_seq("TAG"), 666 expressions=self._parse_csv(self._parse_id_var), 667 unset=True, 668 ), 669 } 670 671 STATEMENT_PARSERS = { 672 **parser.Parser.STATEMENT_PARSERS, 673 TokenType.GET: lambda self: self._parse_get(), 674 TokenType.PUT: lambda self: self._parse_put(), 675 TokenType.SHOW: lambda self: self._parse_show(), 676 } 677 678 PROPERTY_PARSERS = { 679 **parser.Parser.PROPERTY_PARSERS, 680 "CREDENTIALS": lambda self: self._parse_credentials_property(), 681 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 682 "LOCATION": lambda self: self._parse_location_property(), 683 "TAG": lambda self: self._parse_tag(), 684 "USING": lambda self: self._match_text_seq("TEMPLATE") 685 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 686 } 687 688 TYPE_CONVERTERS = { 689 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 690 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 691 } 692 693 SHOW_PARSERS = { 694 "DATABASES": _show_parser("DATABASES"), 695 "TERSE DATABASES": _show_parser("DATABASES"), 696 "SCHEMAS": _show_parser("SCHEMAS"), 697 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 698 "OBJECTS": _show_parser("OBJECTS"), 699 "TERSE OBJECTS": _show_parser("OBJECTS"), 700 "TABLES": _show_parser("TABLES"), 701 "TERSE TABLES": _show_parser("TABLES"), 702 "VIEWS": _show_parser("VIEWS"), 703 "TERSE VIEWS": _show_parser("VIEWS"), 704 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 706 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 708 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 710 "SEQUENCES": _show_parser("SEQUENCES"), 711 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 712 "STAGES": _show_parser("STAGES"), 713 "COLUMNS": _show_parser("COLUMNS"), 714 "USERS": _show_parser("USERS"), 715 "TERSE USERS": _show_parser("USERS"), 716 "FILE FORMATS": _show_parser("FILE FORMATS"), 717 "FUNCTIONS": _show_parser("FUNCTIONS"), 718 "PROCEDURES": _show_parser("PROCEDURES"), 719 "WAREHOUSES": _show_parser("WAREHOUSES"), 720 } 721 722 CONSTRAINT_PARSERS = { 723 **parser.Parser.CONSTRAINT_PARSERS, 724 "WITH": lambda self: self._parse_with_constraint(), 725 "MASKING": lambda self: self._parse_with_constraint(), 726 "PROJECTION": lambda self: self._parse_with_constraint(), 727 "TAG": lambda self: self._parse_with_constraint(), 728 } 729 730 STAGED_FILE_SINGLE_TOKENS = { 731 TokenType.DOT, 732 TokenType.MOD, 733 TokenType.SLASH, 734 } 735 736 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 737 738 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 739 740 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 741 742 LAMBDAS = { 743 **parser.Parser.LAMBDAS, 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 751 ), 752 } 753 754 def _parse_use(self) -> exp.Use: 755 if self._match_text_seq("SECONDARY", "ROLES"): 756 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 757 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 758 return self.expression( 759 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 760 ) 761 762 return super()._parse_use() 763 764 def _negate_range( 765 self, this: t.Optional[exp.Expression] = None 766 ) -> t.Optional[exp.Expression]: 767 if not this: 768 return this 769 770 query = this.args.get("query") 771 if isinstance(this, exp.In) and isinstance(query, exp.Query): 772 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 773 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 774 # which can produce different results (most likely a SnowFlake bug). 775 # 776 # https://docs.snowflake.com/en/sql-reference/functions/in 777 # Context: https://github.com/tobymao/sqlglot/issues/3890 778 return self.expression( 779 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 780 ) 781 782 return self.expression(exp.Not, this=this) 783 784 def _parse_tag(self) -> exp.Tags: 785 return self.expression( 786 exp.Tags, 787 expressions=self._parse_wrapped_csv(self._parse_property), 788 ) 789 790 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 791 if self._prev.token_type != TokenType.WITH: 792 self._retreat(self._index - 1) 793 794 if self._match_text_seq("MASKING", "POLICY"): 795 policy = self._parse_column() 796 return self.expression( 797 exp.MaskingPolicyColumnConstraint, 798 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 799 expressions=self._match(TokenType.USING) 800 and self._parse_wrapped_csv(self._parse_id_var), 801 ) 802 if self._match_text_seq("PROJECTION", "POLICY"): 803 policy = self._parse_column() 804 return self.expression( 805 exp.ProjectionPolicyColumnConstraint, 806 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 807 ) 808 if self._match(TokenType.TAG): 809 return self._parse_tag() 810 811 return None 812 813 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 814 if self._match(TokenType.TAG): 815 return self._parse_tag() 816 817 return super()._parse_with_property() 818 819 def _parse_create(self) -> exp.Create | exp.Command: 820 expression = super()._parse_create() 821 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 822 # Replace the Table node with the enclosed Identifier 823 expression.this.replace(expression.this.this) 824 825 return expression 826 827 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 828 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 829 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 830 this = self._parse_var() or self._parse_type() 831 832 if not this: 833 return None 834 835 self._match(TokenType.COMMA) 836 expression = self._parse_bitwise() 837 this = map_date_part(this) 838 name = this.name.upper() 839 840 if name.startswith("EPOCH"): 841 if name == "EPOCH_MILLISECOND": 842 scale = 10**3 843 elif name == "EPOCH_MICROSECOND": 844 scale = 10**6 845 elif name == "EPOCH_NANOSECOND": 846 scale = 10**9 847 else: 848 scale = None 849 850 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 851 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 852 853 if scale: 854 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 855 856 return to_unix 857 858 return self.expression(exp.Extract, this=this, expression=expression) 859 860 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 861 if is_map: 862 # Keys are strings in Snowflake's objects, see also: 863 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 864 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 865 return self._parse_slice(self._parse_string()) 866 867 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 868 869 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 870 lateral = super()._parse_lateral() 871 if not lateral: 872 return lateral 873 874 if isinstance(lateral.this, exp.Explode): 875 table_alias = lateral.args.get("alias") 876 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 877 if table_alias and not table_alias.args.get("columns"): 878 table_alias.set("columns", columns) 879 elif not table_alias: 880 exp.alias_(lateral, "_flattened", table=columns, copy=False) 881 882 return lateral 883 884 def _parse_table_parts( 885 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 886 ) -> exp.Table: 887 # https://docs.snowflake.com/en/user-guide/querying-stage 888 if self._match(TokenType.STRING, advance=False): 889 table = self._parse_string() 890 elif self._match_text_seq("@", advance=False): 891 table = self._parse_location_path() 892 else: 893 table = None 894 895 if table: 896 file_format = None 897 pattern = None 898 899 wrapped = self._match(TokenType.L_PAREN) 900 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 901 if self._match_text_seq("FILE_FORMAT", "=>"): 902 file_format = self._parse_string() or super()._parse_table_parts( 903 is_db_reference=is_db_reference 904 ) 905 elif self._match_text_seq("PATTERN", "=>"): 906 pattern = self._parse_string() 907 else: 908 break 909 910 self._match(TokenType.COMMA) 911 912 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 913 else: 914 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 915 916 return table 917 918 def _parse_table( 919 self, 920 schema: bool = False, 921 joins: bool = False, 922 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 923 parse_bracket: bool = False, 924 is_db_reference: bool = False, 925 parse_partition: bool = False, 926 consume_pipe: bool = False, 927 ) -> t.Optional[exp.Expression]: 928 table = super()._parse_table( 929 schema=schema, 930 joins=joins, 931 alias_tokens=alias_tokens, 932 parse_bracket=parse_bracket, 933 is_db_reference=is_db_reference, 934 parse_partition=parse_partition, 935 ) 936 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 937 table_from_rows = table.this 938 for arg in exp.TableFromRows.arg_types: 939 if arg != "this": 940 table_from_rows.set(arg, table.args.get(arg)) 941 942 table = table_from_rows 943 944 return table 945 946 def _parse_id_var( 947 self, 948 any_token: bool = True, 949 tokens: t.Optional[t.Collection[TokenType]] = None, 950 ) -> t.Optional[exp.Expression]: 951 if self._match_text_seq("IDENTIFIER", "("): 952 identifier = ( 953 super()._parse_id_var(any_token=any_token, tokens=tokens) 954 or self._parse_string() 955 ) 956 self._match_r_paren() 957 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 958 959 return super()._parse_id_var(any_token=any_token, tokens=tokens) 960 961 def _parse_show_snowflake(self, this: str) -> exp.Show: 962 scope = None 963 scope_kind = None 964 965 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 966 # which is syntactically valid but has no effect on the output 967 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 968 969 history = self._match_text_seq("HISTORY") 970 971 like = self._parse_string() if self._match(TokenType.LIKE) else None 972 973 if self._match(TokenType.IN): 974 if self._match_text_seq("ACCOUNT"): 975 scope_kind = "ACCOUNT" 976 elif self._match_text_seq("CLASS"): 977 scope_kind = "CLASS" 978 scope = self._parse_table_parts() 979 elif self._match_text_seq("APPLICATION"): 980 scope_kind = "APPLICATION" 981 if self._match_text_seq("PACKAGE"): 982 scope_kind += " PACKAGE" 983 scope = self._parse_table_parts() 984 elif self._match_set(self.DB_CREATABLES): 985 scope_kind = self._prev.text.upper() 986 if self._curr: 987 scope = self._parse_table_parts() 988 elif self._curr: 989 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 990 scope = self._parse_table_parts() 991 992 return self.expression( 993 exp.Show, 994 **{ 995 "terse": terse, 996 "this": this, 997 "history": history, 998 "like": like, 999 "scope": scope, 1000 "scope_kind": scope_kind, 1001 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1002 "limit": self._parse_limit(), 1003 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1004 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1005 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1006 }, 1007 ) 1008 1009 def _parse_put(self) -> exp.Put | exp.Command: 1010 if self._curr.token_type != TokenType.STRING: 1011 return self._parse_as_command(self._prev) 1012 1013 return self.expression( 1014 exp.Put, 1015 this=self._parse_string(), 1016 target=self._parse_location_path(), 1017 properties=self._parse_properties(), 1018 ) 1019 1020 def _parse_get(self) -> t.Optional[exp.Expression]: 1021 start = self._prev 1022 1023 # If we detect GET( then we need to parse a function, not a statement 1024 if self._match(TokenType.L_PAREN): 1025 self._retreat(self._index - 2) 1026 return self._parse_expression() 1027 1028 target = self._parse_location_path() 1029 1030 # Parse as command if unquoted file path 1031 if self._curr.token_type == TokenType.URI_START: 1032 return self._parse_as_command(start) 1033 1034 return self.expression( 1035 exp.Get, 1036 this=self._parse_string(), 1037 target=target, 1038 properties=self._parse_properties(), 1039 ) 1040 1041 def _parse_location_property(self) -> exp.LocationProperty: 1042 self._match(TokenType.EQ) 1043 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1044 1045 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1046 # Parse either a subquery or a staged file 1047 return ( 1048 self._parse_select(table=True, parse_subquery_alias=False) 1049 if self._match(TokenType.L_PAREN, advance=False) 1050 else self._parse_table_parts() 1051 ) 1052 1053 def _parse_location_path(self) -> exp.Var: 1054 start = self._curr 1055 self._advance_any(ignore_reserved=True) 1056 1057 # We avoid consuming a comma token because external tables like @foo and @bar 1058 # can be joined in a query with a comma separator, as well as closing paren 1059 # in case of subqueries 1060 while self._is_connected() and not self._match_set( 1061 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1062 ): 1063 self._advance_any(ignore_reserved=True) 1064 1065 return exp.var(self._find_sql(start, self._prev)) 1066 1067 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1068 this = super()._parse_lambda_arg() 1069 1070 if not this: 1071 return this 1072 1073 typ = self._parse_types() 1074 1075 if typ: 1076 return self.expression(exp.Cast, this=this, to=typ) 1077 1078 return this 1079 1080 def _parse_foreign_key(self) -> exp.ForeignKey: 1081 # inlineFK, the REFERENCES columns are implied 1082 if self._match(TokenType.REFERENCES, advance=False): 1083 return self.expression(exp.ForeignKey) 1084 1085 # outoflineFK, explicitly names the columns 1086 return super()._parse_foreign_key() 1087 1088 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1089 self._match(TokenType.EQ) 1090 if self._match(TokenType.L_PAREN, advance=False): 1091 expressions = self._parse_wrapped_options() 1092 else: 1093 expressions = [self._parse_format_name()] 1094 1095 return self.expression( 1096 exp.FileFormatProperty, 1097 expressions=expressions, 1098 ) 1099 1100 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1101 return self.expression( 1102 exp.CredentialsProperty, 1103 expressions=self._parse_wrapped_options(), 1104 ) 1105 1106 def _parse_semantic_view(self) -> exp.SemanticView: 1107 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1108 1109 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1110 if self._match_text_seq("DIMENSIONS"): 1111 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("METRICS"): 1113 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1114 if self._match_text_seq("WHERE"): 1115 kwargs["where"] = self._parse_expression() 1116 1117 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1119 class Tokenizer(tokens.Tokenizer): 1120 STRING_ESCAPES = ["\\", "'"] 1121 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1122 RAW_STRINGS = ["$$"] 1123 COMMENTS = ["--", "//", ("/*", "*/")] 1124 NESTED_COMMENTS = False 1125 1126 KEYWORDS = { 1127 **tokens.Tokenizer.KEYWORDS, 1128 "FILE://": TokenType.URI_START, 1129 "BYTEINT": TokenType.INT, 1130 "EXCLUDE": TokenType.EXCEPT, 1131 "FILE FORMAT": TokenType.FILE_FORMAT, 1132 "GET": TokenType.GET, 1133 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1134 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1135 "MINUS": TokenType.EXCEPT, 1136 "NCHAR VARYING": TokenType.VARCHAR, 1137 "PUT": TokenType.PUT, 1138 "REMOVE": TokenType.COMMAND, 1139 "RM": TokenType.COMMAND, 1140 "SAMPLE": TokenType.TABLE_SAMPLE, 1141 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1142 "SQL_DOUBLE": TokenType.DOUBLE, 1143 "SQL_VARCHAR": TokenType.VARCHAR, 1144 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1145 "TAG": TokenType.TAG, 1146 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1147 "TOP": TokenType.TOP, 1148 "WAREHOUSE": TokenType.WAREHOUSE, 1149 "STAGE": TokenType.STAGE, 1150 "STREAMLIT": TokenType.STREAMLIT, 1151 } 1152 KEYWORDS.pop("/*+") 1153 1154 SINGLE_TOKENS = { 1155 **tokens.Tokenizer.SINGLE_TOKENS, 1156 "$": TokenType.PARAMETER, 1157 } 1158 1159 VAR_SINGLE_TOKENS = {"$"} 1160 1161 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1163 class Generator(generator.Generator): 1164 PARAMETER_TOKEN = "$" 1165 MATCHED_BY_SOURCE = False 1166 SINGLE_STRING_INTERVAL = True 1167 JOIN_HINTS = False 1168 TABLE_HINTS = False 1169 QUERY_HINTS = False 1170 AGGREGATE_FILTER_SUPPORTED = False 1171 SUPPORTS_TABLE_COPY = False 1172 COLLATE_IS_FUNC = True 1173 LIMIT_ONLY_LITERALS = True 1174 JSON_KEY_VALUE_PAIR_SEP = "," 1175 INSERT_OVERWRITE = " OVERWRITE INTO" 1176 STRUCT_DELIMITER = ("(", ")") 1177 COPY_PARAMS_ARE_WRAPPED = False 1178 COPY_PARAMS_EQ_REQUIRED = True 1179 STAR_EXCEPT = "EXCLUDE" 1180 SUPPORTS_EXPLODING_PROJECTIONS = False 1181 ARRAY_CONCAT_IS_VAR_LEN = False 1182 SUPPORTS_CONVERT_TIMEZONE = True 1183 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1184 SUPPORTS_MEDIAN = True 1185 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1186 SUPPORTS_DECODE_CASE = True 1187 IS_BOOL_ALLOWED = False 1188 1189 TRANSFORMS = { 1190 **generator.Generator.TRANSFORMS, 1191 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1192 exp.ArgMax: rename_func("MAX_BY"), 1193 exp.ArgMin: rename_func("MIN_BY"), 1194 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1195 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1196 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1197 exp.AtTimeZone: lambda self, e: self.func( 1198 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1199 ), 1200 exp.BitwiseOr: rename_func("BITOR"), 1201 exp.BitwiseXor: rename_func("BITXOR"), 1202 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1203 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1204 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1205 exp.DateAdd: date_delta_sql("DATEADD"), 1206 exp.DateDiff: date_delta_sql("DATEDIFF"), 1207 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1208 exp.DatetimeDiff: timestampdiff_sql, 1209 exp.DateStrToDate: datestrtodate_sql, 1210 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1211 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1212 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1213 exp.DayOfYear: rename_func("DAYOFYEAR"), 1214 exp.Explode: rename_func("FLATTEN"), 1215 exp.Extract: lambda self, e: self.func( 1216 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1217 ), 1218 exp.FileFormatProperty: lambda self, 1219 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1220 exp.FromTimeZone: lambda self, e: self.func( 1221 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1222 ), 1223 exp.GenerateSeries: lambda self, e: self.func( 1224 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1225 ), 1226 exp.GetExtract: rename_func("GET"), 1227 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1228 exp.If: if_sql(name="IFF", false_value="NULL"), 1229 exp.JSONExtractArray: _json_extract_value_array_sql, 1230 exp.JSONExtractScalar: lambda self, e: self.func( 1231 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1232 ), 1233 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1234 exp.JSONPathRoot: lambda *_: "", 1235 exp.JSONValueArray: _json_extract_value_array_sql, 1236 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1237 rename_func("EDITDISTANCE") 1238 ), 1239 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1240 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1241 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1242 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1243 exp.MakeInterval: no_make_interval_sql, 1244 exp.Max: max_or_greatest, 1245 exp.Min: min_or_least, 1246 exp.ParseJSON: lambda self, e: self.func( 1247 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1248 ), 1249 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1250 exp.PercentileCont: transforms.preprocess( 1251 [transforms.add_within_group_for_percentiles] 1252 ), 1253 exp.PercentileDisc: transforms.preprocess( 1254 [transforms.add_within_group_for_percentiles] 1255 ), 1256 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1257 exp.RegexpExtract: _regexpextract_sql, 1258 exp.RegexpExtractAll: _regexpextract_sql, 1259 exp.RegexpILike: _regexpilike_sql, 1260 exp.Rand: rename_func("RANDOM"), 1261 exp.Select: transforms.preprocess( 1262 [ 1263 transforms.eliminate_window_clause, 1264 transforms.eliminate_distinct_on, 1265 transforms.explode_projection_to_unnest(), 1266 transforms.eliminate_semi_and_anti_joins, 1267 _transform_generate_date_array, 1268 _qualify_unnested_columns, 1269 _eliminate_dot_variant_lookup, 1270 ] 1271 ), 1272 exp.SHA: rename_func("SHA1"), 1273 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1274 exp.StartsWith: rename_func("STARTSWITH"), 1275 exp.EndsWith: rename_func("ENDSWITH"), 1276 exp.StrPosition: lambda self, e: strposition_sql( 1277 self, e, func_name="CHARINDEX", supports_position=True 1278 ), 1279 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1280 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1281 exp.Stuff: rename_func("INSERT"), 1282 exp.StPoint: rename_func("ST_MAKEPOINT"), 1283 exp.TimeAdd: date_delta_sql("TIMEADD"), 1284 exp.Timestamp: no_timestamp_sql, 1285 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1286 exp.TimestampDiff: lambda self, e: self.func( 1287 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1288 ), 1289 exp.TimestampTrunc: timestamptrunc_sql(), 1290 exp.TimeStrToTime: timestrtotime_sql, 1291 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1292 exp.ToArray: rename_func("TO_ARRAY"), 1293 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1294 exp.ToDouble: rename_func("TO_DOUBLE"), 1295 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1296 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1297 exp.TsOrDsToDate: lambda self, e: self.func( 1298 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1299 ), 1300 exp.TsOrDsToTime: lambda self, e: self.func( 1301 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1302 ), 1303 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1304 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1305 exp.Uuid: rename_func("UUID_STRING"), 1306 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1307 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1308 exp.Xor: rename_func("BOOLXOR"), 1309 } 1310 1311 SUPPORTED_JSON_PATH_PARTS = { 1312 exp.JSONPathKey, 1313 exp.JSONPathRoot, 1314 exp.JSONPathSubscript, 1315 } 1316 1317 TYPE_MAPPING = { 1318 **generator.Generator.TYPE_MAPPING, 1319 exp.DataType.Type.NESTED: "OBJECT", 1320 exp.DataType.Type.STRUCT: "OBJECT", 1321 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1322 } 1323 1324 TOKEN_MAPPING = { 1325 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1326 } 1327 1328 PROPERTIES_LOCATION = { 1329 **generator.Generator.PROPERTIES_LOCATION, 1330 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1331 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1332 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1333 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1334 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1335 } 1336 1337 UNSUPPORTED_VALUES_EXPRESSIONS = { 1338 exp.Map, 1339 exp.StarMap, 1340 exp.Struct, 1341 exp.VarMap, 1342 } 1343 1344 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1345 1346 def with_properties(self, properties: exp.Properties) -> str: 1347 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1348 1349 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1350 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1351 values_as_table = False 1352 1353 return super().values_sql(expression, values_as_table=values_as_table) 1354 1355 def datatype_sql(self, expression: exp.DataType) -> str: 1356 expressions = expression.expressions 1357 if ( 1358 expressions 1359 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1360 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1361 ): 1362 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1363 return "OBJECT" 1364 1365 return super().datatype_sql(expression) 1366 1367 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1368 return self.func( 1369 "TO_NUMBER", 1370 expression.this, 1371 expression.args.get("format"), 1372 expression.args.get("precision"), 1373 expression.args.get("scale"), 1374 ) 1375 1376 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1377 milli = expression.args.get("milli") 1378 if milli is not None: 1379 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1380 expression.set("nano", milli_to_nano) 1381 1382 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1383 1384 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1385 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1386 return self.func("TO_GEOGRAPHY", expression.this) 1387 if expression.is_type(exp.DataType.Type.GEOMETRY): 1388 return self.func("TO_GEOMETRY", expression.this) 1389 1390 return super().cast_sql(expression, safe_prefix=safe_prefix) 1391 1392 def trycast_sql(self, expression: exp.TryCast) -> str: 1393 value = expression.this 1394 1395 if value.type is None: 1396 from sqlglot.optimizer.annotate_types import annotate_types 1397 1398 value = annotate_types(value, dialect=self.dialect) 1399 1400 # Snowflake requires that TRY_CAST's value be a string 1401 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1402 # if we can deduce that the value is a string, then we can generate TRY_CAST 1403 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1404 return super().trycast_sql(expression) 1405 1406 return self.cast_sql(expression) 1407 1408 def log_sql(self, expression: exp.Log) -> str: 1409 if not expression.expression: 1410 return self.func("LN", expression.this) 1411 1412 return super().log_sql(expression) 1413 1414 def unnest_sql(self, expression: exp.Unnest) -> str: 1415 unnest_alias = expression.args.get("alias") 1416 offset = expression.args.get("offset") 1417 1418 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1419 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1420 1421 columns = [ 1422 exp.to_identifier("seq"), 1423 exp.to_identifier("key"), 1424 exp.to_identifier("path"), 1425 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1426 value, 1427 exp.to_identifier("this"), 1428 ] 1429 1430 if unnest_alias: 1431 unnest_alias.set("columns", columns) 1432 else: 1433 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1434 1435 table_input = self.sql(expression.expressions[0]) 1436 if not table_input.startswith("INPUT =>"): 1437 table_input = f"INPUT => {table_input}" 1438 1439 expression_parent = expression.parent 1440 1441 explode = ( 1442 f"FLATTEN({table_input})" 1443 if isinstance(expression_parent, exp.Lateral) 1444 else f"TABLE(FLATTEN({table_input}))" 1445 ) 1446 alias = self.sql(unnest_alias) 1447 alias = f" AS {alias}" if alias else "" 1448 value = ( 1449 "" 1450 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1451 else f"{value} FROM " 1452 ) 1453 1454 return f"{value}{explode}{alias}" 1455 1456 def show_sql(self, expression: exp.Show) -> str: 1457 terse = "TERSE " if expression.args.get("terse") else "" 1458 history = " HISTORY" if expression.args.get("history") else "" 1459 like = self.sql(expression, "like") 1460 like = f" LIKE {like}" if like else "" 1461 1462 scope = self.sql(expression, "scope") 1463 scope = f" {scope}" if scope else "" 1464 1465 scope_kind = self.sql(expression, "scope_kind") 1466 if scope_kind: 1467 scope_kind = f" IN {scope_kind}" 1468 1469 starts_with = self.sql(expression, "starts_with") 1470 if starts_with: 1471 starts_with = f" STARTS WITH {starts_with}" 1472 1473 limit = self.sql(expression, "limit") 1474 1475 from_ = self.sql(expression, "from") 1476 if from_: 1477 from_ = f" FROM {from_}" 1478 1479 privileges = self.expressions(expression, key="privileges", flat=True) 1480 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1481 1482 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1483 1484 def describe_sql(self, expression: exp.Describe) -> str: 1485 # Default to table if kind is unknown 1486 kind_value = expression.args.get("kind") or "TABLE" 1487 kind = f" {kind_value}" if kind_value else "" 1488 this = f" {self.sql(expression, 'this')}" 1489 expressions = self.expressions(expression, flat=True) 1490 expressions = f" {expressions}" if expressions else "" 1491 return f"DESCRIBE{kind}{this}{expressions}" 1492 1493 def generatedasidentitycolumnconstraint_sql( 1494 self, expression: exp.GeneratedAsIdentityColumnConstraint 1495 ) -> str: 1496 start = expression.args.get("start") 1497 start = f" START {start}" if start else "" 1498 increment = expression.args.get("increment") 1499 increment = f" INCREMENT {increment}" if increment else "" 1500 1501 order = expression.args.get("order") 1502 if order is not None: 1503 order_clause = " ORDER" if order else " NOORDER" 1504 else: 1505 order_clause = "" 1506 1507 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1508 1509 def cluster_sql(self, expression: exp.Cluster) -> str: 1510 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1511 1512 def struct_sql(self, expression: exp.Struct) -> str: 1513 keys = [] 1514 values = [] 1515 1516 for i, e in enumerate(expression.expressions): 1517 if isinstance(e, exp.PropertyEQ): 1518 keys.append( 1519 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1520 ) 1521 values.append(e.expression) 1522 else: 1523 keys.append(exp.Literal.string(f"_{i}")) 1524 values.append(e) 1525 1526 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1527 1528 @unsupported_args("weight", "accuracy") 1529 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1530 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1531 1532 def alterset_sql(self, expression: exp.AlterSet) -> str: 1533 exprs = self.expressions(expression, flat=True) 1534 exprs = f" {exprs}" if exprs else "" 1535 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1536 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1537 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1538 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1539 tag = self.expressions(expression, key="tag", flat=True) 1540 tag = f" TAG {tag}" if tag else "" 1541 1542 return f"SET{exprs}{file_format}{copy_options}{tag}" 1543 1544 def strtotime_sql(self, expression: exp.StrToTime): 1545 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1546 return self.func( 1547 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1548 ) 1549 1550 def timestampsub_sql(self, expression: exp.TimestampSub): 1551 return self.sql( 1552 exp.TimestampAdd( 1553 this=expression.this, 1554 expression=expression.expression * -1, 1555 unit=expression.unit, 1556 ) 1557 ) 1558 1559 def jsonextract_sql(self, expression: exp.JSONExtract): 1560 this = expression.this 1561 1562 # JSON strings are valid coming from other dialects such as BQ so 1563 # for these cases we PARSE_JSON preemptively 1564 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1565 "requires_json" 1566 ): 1567 this = exp.ParseJSON(this=this) 1568 1569 return self.func( 1570 "GET_PATH", 1571 this, 1572 expression.expression, 1573 ) 1574 1575 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1576 this = expression.this 1577 if this.is_string: 1578 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1579 1580 return self.func("TO_CHAR", this, self.format_time(expression)) 1581 1582 def datesub_sql(self, expression: exp.DateSub) -> str: 1583 value = expression.expression 1584 if value: 1585 value.replace(value * (-1)) 1586 else: 1587 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1588 1589 return date_delta_sql("DATEADD")(self, expression) 1590 1591 def select_sql(self, expression: exp.Select) -> str: 1592 limit = expression.args.get("limit") 1593 offset = expression.args.get("offset") 1594 if offset and not limit: 1595 expression.limit(exp.Null(), copy=False) 1596 return super().select_sql(expression) 1597 1598 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1599 is_materialized = expression.find(exp.MaterializedProperty) 1600 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1601 1602 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1603 # For materialized views, COPY GRANTS is located *before* the columns list 1604 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1605 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1607 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1608 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1609 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1610 1611 this_name = self.sql(expression.this, "this") 1612 copy_grants = self.sql(copy_grants_property) 1613 this_schema = self.schema_columns_sql(expression.this) 1614 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1615 1616 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1617 1618 return super().createable_sql(expression, locations) 1619 1620 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1621 this = expression.this 1622 1623 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1624 # and add it later as part of the WITHIN GROUP clause 1625 order = this if isinstance(this, exp.Order) else None 1626 if order: 1627 expression.set("this", order.this.pop()) 1628 1629 expr_sql = super().arrayagg_sql(expression) 1630 1631 if order: 1632 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1633 1634 return expr_sql 1635 1636 def array_sql(self, expression: exp.Array) -> str: 1637 expressions = expression.expressions 1638 1639 first_expr = seq_get(expressions, 0) 1640 if isinstance(first_expr, exp.Select): 1641 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1642 if first_expr.text("kind").upper() == "STRUCT": 1643 object_construct_args = [] 1644 for expr in first_expr.expressions: 1645 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1646 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1647 name = expr.this if isinstance(expr, exp.Alias) else expr 1648 1649 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1650 1651 array_agg = exp.ArrayAgg( 1652 this=_build_object_construct(args=object_construct_args) 1653 ) 1654 1655 first_expr.set("kind", None) 1656 first_expr.set("expressions", [array_agg]) 1657 1658 return self.sql(first_expr.subquery()) 1659 1660 return inline_array_sql(self, expression) 1661 1662 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1663 zone = self.sql(expression, "this") 1664 if not zone: 1665 return super().currentdate_sql(expression) 1666 1667 expr = exp.Cast( 1668 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1669 to=exp.DataType(this=exp.DataType.Type.DATE), 1670 ) 1671 return self.sql(expr) 1672 1673 def dot_sql(self, expression: exp.Dot) -> str: 1674 this = expression.this 1675 1676 if not this.type: 1677 from sqlglot.optimizer.annotate_types import annotate_types 1678 1679 this = annotate_types(this, dialect=self.dialect) 1680 1681 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1682 # Generate colon notation for the top level STRUCT 1683 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1684 1685 return super().dot_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1355 def datatype_sql(self, expression: exp.DataType) -> str: 1356 expressions = expression.expressions 1357 if ( 1358 expressions 1359 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1360 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1361 ): 1362 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1363 return "OBJECT" 1364 1365 return super().datatype_sql(expression)
1376 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1377 milli = expression.args.get("milli") 1378 if milli is not None: 1379 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1380 expression.set("nano", milli_to_nano) 1381 1382 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1384 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1385 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1386 return self.func("TO_GEOGRAPHY", expression.this) 1387 if expression.is_type(exp.DataType.Type.GEOMETRY): 1388 return self.func("TO_GEOMETRY", expression.this) 1389 1390 return super().cast_sql(expression, safe_prefix=safe_prefix)
1392 def trycast_sql(self, expression: exp.TryCast) -> str: 1393 value = expression.this 1394 1395 if value.type is None: 1396 from sqlglot.optimizer.annotate_types import annotate_types 1397 1398 value = annotate_types(value, dialect=self.dialect) 1399 1400 # Snowflake requires that TRY_CAST's value be a string 1401 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1402 # if we can deduce that the value is a string, then we can generate TRY_CAST 1403 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1404 return super().trycast_sql(expression) 1405 1406 return self.cast_sql(expression)
1414 def unnest_sql(self, expression: exp.Unnest) -> str: 1415 unnest_alias = expression.args.get("alias") 1416 offset = expression.args.get("offset") 1417 1418 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1419 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1420 1421 columns = [ 1422 exp.to_identifier("seq"), 1423 exp.to_identifier("key"), 1424 exp.to_identifier("path"), 1425 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1426 value, 1427 exp.to_identifier("this"), 1428 ] 1429 1430 if unnest_alias: 1431 unnest_alias.set("columns", columns) 1432 else: 1433 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1434 1435 table_input = self.sql(expression.expressions[0]) 1436 if not table_input.startswith("INPUT =>"): 1437 table_input = f"INPUT => {table_input}" 1438 1439 expression_parent = expression.parent 1440 1441 explode = ( 1442 f"FLATTEN({table_input})" 1443 if isinstance(expression_parent, exp.Lateral) 1444 else f"TABLE(FLATTEN({table_input}))" 1445 ) 1446 alias = self.sql(unnest_alias) 1447 alias = f" AS {alias}" if alias else "" 1448 value = ( 1449 "" 1450 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1451 else f"{value} FROM " 1452 ) 1453 1454 return f"{value}{explode}{alias}"
1456 def show_sql(self, expression: exp.Show) -> str: 1457 terse = "TERSE " if expression.args.get("terse") else "" 1458 history = " HISTORY" if expression.args.get("history") else "" 1459 like = self.sql(expression, "like") 1460 like = f" LIKE {like}" if like else "" 1461 1462 scope = self.sql(expression, "scope") 1463 scope = f" {scope}" if scope else "" 1464 1465 scope_kind = self.sql(expression, "scope_kind") 1466 if scope_kind: 1467 scope_kind = f" IN {scope_kind}" 1468 1469 starts_with = self.sql(expression, "starts_with") 1470 if starts_with: 1471 starts_with = f" STARTS WITH {starts_with}" 1472 1473 limit = self.sql(expression, "limit") 1474 1475 from_ = self.sql(expression, "from") 1476 if from_: 1477 from_ = f" FROM {from_}" 1478 1479 privileges = self.expressions(expression, key="privileges", flat=True) 1480 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1481 1482 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1484 def describe_sql(self, expression: exp.Describe) -> str: 1485 # Default to table if kind is unknown 1486 kind_value = expression.args.get("kind") or "TABLE" 1487 kind = f" {kind_value}" if kind_value else "" 1488 this = f" {self.sql(expression, 'this')}" 1489 expressions = self.expressions(expression, flat=True) 1490 expressions = f" {expressions}" if expressions else "" 1491 return f"DESCRIBE{kind}{this}{expressions}"
1493 def generatedasidentitycolumnconstraint_sql( 1494 self, expression: exp.GeneratedAsIdentityColumnConstraint 1495 ) -> str: 1496 start = expression.args.get("start") 1497 start = f" START {start}" if start else "" 1498 increment = expression.args.get("increment") 1499 increment = f" INCREMENT {increment}" if increment else "" 1500 1501 order = expression.args.get("order") 1502 if order is not None: 1503 order_clause = " ORDER" if order else " NOORDER" 1504 else: 1505 order_clause = "" 1506 1507 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1512 def struct_sql(self, expression: exp.Struct) -> str: 1513 keys = [] 1514 values = [] 1515 1516 for i, e in enumerate(expression.expressions): 1517 if isinstance(e, exp.PropertyEQ): 1518 keys.append( 1519 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1520 ) 1521 values.append(e.expression) 1522 else: 1523 keys.append(exp.Literal.string(f"_{i}")) 1524 values.append(e) 1525 1526 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1532 def alterset_sql(self, expression: exp.AlterSet) -> str: 1533 exprs = self.expressions(expression, flat=True) 1534 exprs = f" {exprs}" if exprs else "" 1535 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1536 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1537 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1538 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1539 tag = self.expressions(expression, key="tag", flat=True) 1540 tag = f" TAG {tag}" if tag else "" 1541 1542 return f"SET{exprs}{file_format}{copy_options}{tag}"
1559 def jsonextract_sql(self, expression: exp.JSONExtract): 1560 this = expression.this 1561 1562 # JSON strings are valid coming from other dialects such as BQ so 1563 # for these cases we PARSE_JSON preemptively 1564 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1565 "requires_json" 1566 ): 1567 this = exp.ParseJSON(this=this) 1568 1569 return self.func( 1570 "GET_PATH", 1571 this, 1572 expression.expression, 1573 )
1582 def datesub_sql(self, expression: exp.DateSub) -> str: 1583 value = expression.expression 1584 if value: 1585 value.replace(value * (-1)) 1586 else: 1587 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1588 1589 return date_delta_sql("DATEADD")(self, expression)
1598 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1599 is_materialized = expression.find(exp.MaterializedProperty) 1600 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1601 1602 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1603 # For materialized views, COPY GRANTS is located *before* the columns list 1604 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1605 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1607 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1608 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1609 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1610 1611 this_name = self.sql(expression.this, "this") 1612 copy_grants = self.sql(copy_grants_property) 1613 this_schema = self.schema_columns_sql(expression.this) 1614 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1615 1616 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1617 1618 return super().createable_sql(expression, locations)
1620 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1621 this = expression.this 1622 1623 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1624 # and add it later as part of the WITHIN GROUP clause 1625 order = this if isinstance(this, exp.Order) else None 1626 if order: 1627 expression.set("this", order.this.pop()) 1628 1629 expr_sql = super().arrayagg_sql(expression) 1630 1631 if order: 1632 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1633 1634 return expr_sql
1636 def array_sql(self, expression: exp.Array) -> str: 1637 expressions = expression.expressions 1638 1639 first_expr = seq_get(expressions, 0) 1640 if isinstance(first_expr, exp.Select): 1641 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1642 if first_expr.text("kind").upper() == "STRUCT": 1643 object_construct_args = [] 1644 for expr in first_expr.expressions: 1645 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1646 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1647 name = expr.this if isinstance(expr, exp.Alias) else expr 1648 1649 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1650 1651 array_agg = exp.ArrayAgg( 1652 this=_build_object_construct(args=object_construct_args) 1653 ) 1654 1655 first_expr.set("kind", None) 1656 first_expr.set("expressions", [array_agg]) 1657 1658 return self.sql(first_expr.subquery()) 1659 1660 return inline_array_sql(self, expression)
1662 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1663 zone = self.sql(expression, "this") 1664 if not zone: 1665 return super().currentdate_sql(expression) 1666 1667 expr = exp.Cast( 1668 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1669 to=exp.DataType(this=exp.DataType.Type.DATE), 1670 ) 1671 return self.sql(expr)
1673 def dot_sql(self, expression: exp.Dot) -> str: 1674 this = expression.this 1675 1676 if not this.type: 1677 from sqlglot.optimizer.annotate_types import annotate_types 1678 1679 this = annotate_types(this, dialect=self.dialect) 1680 1681 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1682 # Generate colon notation for the top level STRUCT 1683 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1684 1685 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql