sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 array_append_sql, 10 array_concat_sql, 11 build_timetostr_or_tochar, 12 build_like, 13 binary_from_function, 14 build_default_decimal_type, 15 build_replace_with_optional_replacement, 16 date_delta_sql, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 build_formatted_time, 20 if_sql, 21 inline_array_sql, 22 max_or_greatest, 23 min_or_least, 24 rename_func, 25 timestamptrunc_sql, 26 timestrtotime_sql, 27 unit_to_str, 28 var_map_sql, 29 map_date_part, 30 no_timestamp_sql, 31 strposition_sql, 32 timestampdiff_sql, 33 no_make_interval_sql, 34 groupconcat_sql, 35) 36from sqlglot.generator import unsupported_args 37from sqlglot.helper import find_new_name, flatten, is_date_unit, is_int, seq_get 38from sqlglot.optimizer.scope import build_scope, find_all_in_scope 39from sqlglot.tokens import TokenType 40from sqlglot.typing.snowflake import EXPRESSION_METADATA 41 42if t.TYPE_CHECKING: 43 from sqlglot._typing import E, B 44 45 46# Timestamp types used in _build_datetime 47TIMESTAMP_TYPES = { 48 exp.DataType.Type.TIMESTAMP: "TO_TIMESTAMP", 49 exp.DataType.Type.TIMESTAMPLTZ: "TO_TIMESTAMP_LTZ", 50 exp.DataType.Type.TIMESTAMPNTZ: "TO_TIMESTAMP_NTZ", 51 exp.DataType.Type.TIMESTAMPTZ: "TO_TIMESTAMP_TZ", 52} 53 54 55def _build_strtok(args: t.List) -> exp.SplitPart: 56 # Add default delimiter (space) if missing - per Snowflake docs 57 if len(args) == 1: 58 args.append(exp.Literal.string(" ")) 59 60 # Add default part_index (1) if missing 61 if len(args) == 2: 62 args.append(exp.Literal.number(1)) 63 64 return exp.SplitPart.from_arg_list(args) 65 66 67def _build_approx_top_k(args: t.List) -> exp.ApproxTopK: 68 """ 69 Normalizes APPROX_TOP_K arguments to match Snowflake semantics. 70 71 Snowflake APPROX_TOP_K signature: APPROX_TOP_K(column [, k] [, counters]) 72 - k defaults to 1 if omitted (per Snowflake documentation) 73 - counters is optional precision parameter 74 """ 75 # Add default k=1 if only column is provided 76 if len(args) == 1: 77 args.append(exp.Literal.number(1)) 78 79 return exp.ApproxTopK.from_arg_list(args) 80 81 82def _build_date_from_parts(args: t.List) -> exp.DateFromParts: 83 return exp.DateFromParts( 84 year=seq_get(args, 0), 85 month=seq_get(args, 1), 86 day=seq_get(args, 2), 87 allow_overflow=True, 88 ) 89 90 91def _build_datetime( 92 name: str, kind: exp.DataType.Type, safe: bool = False 93) -> t.Callable[[t.List], exp.Func]: 94 def _builder(args: t.List) -> exp.Func: 95 value = seq_get(args, 0) 96 scale_or_fmt = seq_get(args, 1) 97 98 int_value = value is not None and is_int(value.name) 99 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 100 101 if isinstance(value, (exp.Literal, exp.Neg)) or (value and scale_or_fmt): 102 # Converts calls like `TO_TIME('01:02:03')` into casts 103 if len(args) == 1 and value.is_string and not int_value: 104 return ( 105 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 106 if safe 107 else exp.cast(value, kind) 108 ) 109 110 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 111 # cases so we can transpile them, since they're relatively common 112 if kind in TIMESTAMP_TYPES: 113 if not safe and (int_scale_or_fmt or (int_value and scale_or_fmt is None)): 114 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 115 # it's not easily transpilable. Also, numeric-looking strings with 116 # format strings (e.g., TO_TIMESTAMP('20240115', 'YYYYMMDD')) should 117 # use StrToTime, not UnixToTime. 118 unix_expr = exp.UnixToTime(this=value, scale=scale_or_fmt) 119 unix_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) 120 return unix_expr 121 if scale_or_fmt and not int_scale_or_fmt: 122 # Format string provided (e.g., 'YYYY-MM-DD'), use StrToTime 123 strtotime_expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 124 strtotime_expr.set("safe", safe) 125 strtotime_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) 126 return strtotime_expr 127 128 # Handle DATE/TIME with format strings - allow int_value if a format string is provided 129 has_format_string = scale_or_fmt and not int_scale_or_fmt 130 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and ( 131 not int_value or has_format_string 132 ): 133 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 134 formatted_exp = build_formatted_time(klass, "snowflake")(args) 135 formatted_exp.set("safe", safe) 136 return formatted_exp 137 138 return exp.Anonymous(this=name, expressions=args) 139 140 return _builder 141 142 143def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 144 expression = parser.build_var_map(args) 145 146 if isinstance(expression, exp.StarMap): 147 return expression 148 149 return exp.Struct( 150 expressions=[ 151 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 152 ] 153 ) 154 155 156def _build_datediff(args: t.List) -> exp.DateDiff: 157 return exp.DateDiff( 158 this=seq_get(args, 2), 159 expression=seq_get(args, 1), 160 unit=map_date_part(seq_get(args, 0)), 161 date_part_boundary=True, 162 ) 163 164 165def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 166 def _builder(args: t.List) -> E: 167 return expr_type( 168 this=seq_get(args, 2), 169 expression=seq_get(args, 1), 170 unit=map_date_part(seq_get(args, 0)), 171 ) 172 173 return _builder 174 175 176def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 177 def _builder(args: t.List) -> B | exp.Anonymous: 178 if len(args) == 3: 179 # Special handling for bitwise operations with padside argument 180 if expr_type in (exp.BitwiseAnd, exp.BitwiseOr, exp.BitwiseXor): 181 return expr_type( 182 this=seq_get(args, 0), expression=seq_get(args, 1), padside=seq_get(args, 2) 183 ) 184 return exp.Anonymous(this=name, expressions=args) 185 186 result = binary_from_function(expr_type)(args) 187 188 # Snowflake specifies INT128 for bitwise shifts 189 if expr_type in (exp.BitwiseLeftShift, exp.BitwiseRightShift): 190 result.set("requires_int128", True) 191 192 return result 193 194 return _builder 195 196 197# https://docs.snowflake.com/en/sql-reference/functions/div0 198def _build_if_from_div0(args: t.List) -> exp.If: 199 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 200 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 201 202 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 203 exp.Is(this=lhs, expression=exp.null()).not_() 204 ) 205 true = exp.Literal.number(0) 206 false = exp.Div(this=lhs, expression=rhs) 207 return exp.If(this=cond, true=true, false=false) 208 209 210# https://docs.snowflake.com/en/sql-reference/functions/div0null 211def _build_if_from_div0null(args: t.List) -> exp.If: 212 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 213 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 214 215 # Returns 0 when divisor is 0 OR NULL 216 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).or_( 217 exp.Is(this=rhs, expression=exp.null()) 218 ) 219 true = exp.Literal.number(0) 220 false = exp.Div(this=lhs, expression=rhs) 221 return exp.If(this=cond, true=true, false=false) 222 223 224# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 225def _build_if_from_zeroifnull(args: t.List) -> exp.If: 226 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 227 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 228 229 230def _build_search(args: t.List) -> exp.Search: 231 kwargs = { 232 "this": seq_get(args, 0), 233 "expression": seq_get(args, 1), 234 **{arg.name.lower(): arg for arg in args[2:] if isinstance(arg, exp.Kwarg)}, 235 } 236 return exp.Search(**kwargs) 237 238 239# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 240def _build_if_from_nullifzero(args: t.List) -> exp.If: 241 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 242 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 243 244 245def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 246 flag = expression.text("flag") 247 248 if "i" not in flag: 249 flag += "i" 250 251 return self.func( 252 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 253 ) 254 255 256def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 257 regexp_replace = exp.RegexpReplace.from_arg_list(args) 258 259 if not regexp_replace.args.get("replacement"): 260 regexp_replace.set("replacement", exp.Literal.string("")) 261 262 return regexp_replace 263 264 265def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 266 def _parse(self: Snowflake.Parser) -> exp.Show: 267 return self._parse_show_snowflake(*args, **kwargs) 268 269 return _parse 270 271 272def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 273 trunc = date_trunc_to_time(args) 274 unit = map_date_part(trunc.args["unit"]) 275 trunc.set("unit", unit) 276 is_time_input = trunc.this.is_type(exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ) 277 if (isinstance(trunc, exp.TimestampTrunc) and is_date_unit(unit) or is_time_input) or ( 278 isinstance(trunc, exp.DateTrunc) and not is_date_unit(unit) 279 ): 280 trunc.set("input_type_preserved", True) 281 return trunc 282 283 284def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 285 """ 286 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 287 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 288 289 Example: 290 >>> from sqlglot import parse_one 291 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 292 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 293 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 294 """ 295 if isinstance(expression, exp.Pivot): 296 if expression.unpivot: 297 expression = transforms.unqualify_columns(expression) 298 else: 299 for field in expression.fields: 300 field_expr = seq_get(field.expressions if field else [], 0) 301 302 if isinstance(field_expr, exp.PivotAny): 303 unqualified_field_expr = transforms.unqualify_columns(field_expr) 304 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 305 306 return expression 307 308 309def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 310 assert isinstance(expression, exp.Create) 311 312 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 313 if expression.this in exp.DataType.NESTED_TYPES: 314 expression.set("expressions", None) 315 return expression 316 317 props = expression.args.get("properties") 318 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 319 for schema_expression in expression.this.expressions: 320 if isinstance(schema_expression, exp.ColumnDef): 321 column_type = schema_expression.kind 322 if isinstance(column_type, exp.DataType): 323 column_type.transform(_flatten_structured_type, copy=False) 324 325 return expression 326 327 328def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 329 generate_date_array = unnest.expressions[0] 330 start = generate_date_array.args.get("start") 331 end = generate_date_array.args.get("end") 332 step = generate_date_array.args.get("step") 333 334 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 335 return 336 337 unit = step.args.get("unit") 338 339 unnest_alias = unnest.args.get("alias") 340 if unnest_alias: 341 unnest_alias = unnest_alias.copy() 342 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 343 else: 344 sequence_value_name = "value" 345 346 # We'll add the next sequence value to the starting date and project the result 347 date_add = _build_date_time_add(exp.DateAdd)( 348 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 349 ) 350 351 # We use DATEDIFF to compute the number of sequence values needed 352 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 353 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 354 ) 355 356 unnest.set("expressions", [number_sequence]) 357 358 unnest_parent = unnest.parent 359 if isinstance(unnest_parent, exp.Join): 360 select = unnest_parent.parent 361 if isinstance(select, exp.Select): 362 replace_column_name = ( 363 sequence_value_name 364 if isinstance(sequence_value_name, str) 365 else sequence_value_name.name 366 ) 367 368 scope = build_scope(select) 369 if scope: 370 for column in scope.columns: 371 if column.name.lower() == replace_column_name.lower(): 372 column.replace( 373 date_add.as_(replace_column_name) 374 if isinstance(column.parent, exp.Select) 375 else date_add 376 ) 377 378 lateral = exp.Lateral(this=unnest_parent.this.pop()) 379 unnest_parent.replace(exp.Join(this=lateral)) 380 else: 381 unnest.replace( 382 exp.select(date_add.as_(sequence_value_name)) 383 .from_(unnest.copy()) 384 .subquery(unnest_alias) 385 ) 386 387 388def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 389 if isinstance(expression, exp.Select): 390 for generate_date_array in expression.find_all(exp.GenerateDateArray): 391 parent = generate_date_array.parent 392 393 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 394 # query is the following (it'll be unnested properly on the next iteration due to copy): 395 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 396 if not isinstance(parent, exp.Unnest): 397 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 398 generate_date_array.replace( 399 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 400 ) 401 402 if ( 403 isinstance(parent, exp.Unnest) 404 and isinstance(parent.parent, (exp.From, exp.Join)) 405 and len(parent.expressions) == 1 406 ): 407 _unnest_generate_date_array(parent) 408 409 return expression 410 411 412def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Snowflake], E]: 413 def _builder(args: t.List, dialect: Snowflake) -> E: 414 return expr_type( 415 this=seq_get(args, 0), 416 expression=seq_get(args, 1), 417 position=seq_get(args, 2), 418 occurrence=seq_get(args, 3), 419 parameters=seq_get(args, 4), 420 group=seq_get(args, 5) or exp.Literal.number(0), 421 **( 422 {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} 423 if expr_type is exp.RegexpExtract 424 else {} 425 ), 426 ) 427 428 return _builder 429 430 431def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 432 # Other dialects don't support all of the following parameters, so we need to 433 # generate default values as necessary to ensure the transpilation is correct 434 group = expression.args.get("group") 435 436 # To avoid generating all these default values, we set group to None if 437 # it's 0 (also default value) which doesn't trigger the following chain 438 if group and group.name == "0": 439 group = None 440 441 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 442 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 443 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 444 445 return self.func( 446 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 447 expression.this, 448 expression.expression, 449 position, 450 occurrence, 451 parameters, 452 group, 453 ) 454 455 456def _json_extract_value_array_sql( 457 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 458) -> str: 459 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 460 ident = exp.to_identifier("x") 461 462 if isinstance(expression, exp.JSONValueArray): 463 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 464 else: 465 this = exp.ParseJSON(this=f"TO_JSON({ident})") 466 467 transform_lambda = exp.Lambda(expressions=[ident], this=this) 468 469 return self.func("TRANSFORM", json_extract, transform_lambda) 470 471 472def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 473 if isinstance(expression, exp.Select): 474 scope = build_scope(expression) 475 if not scope: 476 return expression 477 478 unnests = list(scope.find_all(exp.Unnest)) 479 480 if not unnests: 481 return expression 482 483 taken_source_names = set(scope.sources) 484 column_source: t.Dict[str, exp.Identifier] = {} 485 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 486 487 unnest_identifier: t.Optional[exp.Identifier] = None 488 orig_expression = expression.copy() 489 490 for unnest in unnests: 491 if not isinstance(unnest.parent, (exp.From, exp.Join)): 492 continue 493 494 # Try to infer column names produced by an unnest operator. This is only possible 495 # when we can peek into the (statically known) contents of the unnested value. 496 unnest_columns: t.Set[str] = set() 497 for unnest_expr in unnest.expressions: 498 if not isinstance(unnest_expr, exp.Array): 499 continue 500 501 for array_expr in unnest_expr.expressions: 502 if not ( 503 isinstance(array_expr, exp.Struct) 504 and array_expr.expressions 505 and all( 506 isinstance(struct_expr, exp.PropertyEQ) 507 for struct_expr in array_expr.expressions 508 ) 509 ): 510 continue 511 512 unnest_columns.update( 513 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 514 ) 515 break 516 517 if unnest_columns: 518 break 519 520 unnest_alias = unnest.args.get("alias") 521 if not unnest_alias: 522 alias_name = find_new_name(taken_source_names, "value") 523 taken_source_names.add(alias_name) 524 525 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 526 # will be corrected later, when we generate SQL for the `Unnest` AST node. 527 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 528 scope.replace(unnest, aliased_unnest) 529 530 unnest_identifier = aliased_unnest.args["alias"].columns[0] 531 else: 532 alias_columns = getattr(unnest_alias, "columns", []) 533 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 534 535 if not isinstance(unnest_identifier, exp.Identifier): 536 return orig_expression 537 538 unnest_to_identifier[unnest] = unnest_identifier 539 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 540 541 for column in scope.columns: 542 if column.table: 543 continue 544 545 table = column_source.get(column.name.lower()) 546 if ( 547 unnest_identifier 548 and not table 549 and len(scope.sources) == 1 550 and column.name.lower() != unnest_identifier.name.lower() 551 ): 552 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 553 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 554 if ( 555 isinstance(unnest_ancestor, exp.Unnest) 556 and ancestor_identifier 557 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 558 ): 559 continue 560 561 table = unnest_identifier 562 563 column.set("table", table and table.copy()) 564 565 return expression 566 567 568def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 569 if isinstance(expression, exp.Select): 570 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 571 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 572 # by Snowflake's parser. 573 # 574 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 575 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 576 unnest_aliases = set() 577 for unnest in find_all_in_scope(expression, exp.Unnest): 578 unnest_alias = unnest.args.get("alias") 579 if ( 580 isinstance(unnest_alias, exp.TableAlias) 581 and not unnest_alias.this 582 and len(unnest_alias.columns) == 1 583 ): 584 unnest_aliases.add(unnest_alias.columns[0].name) 585 586 if unnest_aliases: 587 for c in find_all_in_scope(expression, exp.Column): 588 if c.table in unnest_aliases: 589 bracket_lhs = c.args["table"] 590 bracket_rhs = exp.Literal.string(c.name) 591 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 592 593 if c.parent is expression: 594 # Retain column projection names by using aliases 595 c.replace(exp.alias_(bracket, c.this.copy())) 596 else: 597 c.replace(bracket) 598 599 return expression 600 601 602def _build_timestamp_from_parts(args: t.List) -> exp.Func: 603 """Build TimestampFromParts with support for both syntaxes: 604 1. TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second [, nanosecond] [, time_zone]) 605 2. TIMESTAMP_FROM_PARTS(date_expr, time_expr) - Snowflake specific 606 """ 607 if len(args) == 2: 608 return exp.TimestampFromParts(this=seq_get(args, 0), expression=seq_get(args, 1)) 609 610 return exp.TimestampFromParts.from_arg_list(args) 611 612 613def _build_round(args: t.List) -> exp.Round: 614 """ 615 Build Round expression, unwrapping Snowflake's named parameters. 616 617 Maps EXPR => this, SCALE => decimals, ROUNDING_MODE => truncate. 618 619 Note: Snowflake does not support mixing named and positional arguments. 620 Arguments are either all named or all positional. 621 """ 622 kwarg_map = {"EXPR": "this", "SCALE": "decimals", "ROUNDING_MODE": "truncate"} 623 round_args = {} 624 positional_keys = ["this", "decimals", "truncate"] 625 positional_idx = 0 626 627 for arg in args: 628 if isinstance(arg, exp.Kwarg): 629 key = arg.this.name.upper() 630 round_key = kwarg_map.get(key) 631 if round_key: 632 round_args[round_key] = arg.expression 633 else: 634 if positional_idx < len(positional_keys): 635 round_args[positional_keys[positional_idx]] = arg 636 positional_idx += 1 637 638 expression = exp.Round(**round_args) 639 expression.set("casts_non_integer_decimals", True) 640 return expression 641 642 643def _build_generator(args: t.List) -> exp.Generator: 644 """ 645 Build Generator expression, unwrapping Snowflake's named parameters. 646 647 Maps ROWCOUNT => rowcount, TIMELIMIT => time_limit. 648 """ 649 kwarg_map = {"ROWCOUNT": "rowcount", "TIMELIMIT": "time_limit"} 650 gen_args = {} 651 652 for arg in args: 653 if isinstance(arg, exp.Kwarg): 654 key = arg.this.name.upper() 655 gen_key = kwarg_map.get(key) 656 if gen_key: 657 gen_args[gen_key] = arg.expression 658 659 return exp.Generator(**gen_args) 660 661 662def _build_try_to_number(args: t.List[exp.Expression]) -> exp.Expression: 663 return exp.ToNumber( 664 this=seq_get(args, 0), 665 format=seq_get(args, 1), 666 precision=seq_get(args, 2), 667 scale=seq_get(args, 3), 668 safe=True, 669 ) 670 671 672class Snowflake(Dialect): 673 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 674 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 675 NULL_ORDERING = "nulls_are_large" 676 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 677 SUPPORTS_USER_DEFINED_TYPES = False 678 SUPPORTS_SEMI_ANTI_JOIN = False 679 PREFER_CTE_ALIAS_COLUMN = True 680 TABLESAMPLE_SIZE_IS_PERCENT = True 681 COPY_PARAMS_ARE_CSV = False 682 ARRAY_AGG_INCLUDES_NULLS = None 683 ARRAY_FUNCS_PROPAGATES_NULLS = True 684 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 685 TRY_CAST_REQUIRES_STRING = True 686 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 687 LEAST_GREATEST_IGNORES_NULLS = False 688 689 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 690 691 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 692 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 693 694 INVERSE_TIME_MAPPING = { 695 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 696 } 697 698 TIME_MAPPING = { 699 "YYYY": "%Y", 700 "yyyy": "%Y", 701 "YY": "%y", 702 "yy": "%y", 703 "MMMM": "%B", 704 "mmmm": "%B", 705 "MON": "%b", 706 "mon": "%b", 707 "MM": "%m", 708 "mm": "%m", 709 "DD": "%d", 710 "dd": "%-d", 711 "DY": "%a", 712 "dy": "%w", 713 "HH24": "%H", 714 "hh24": "%H", 715 "HH12": "%I", 716 "hh12": "%I", 717 "MI": "%M", 718 "mi": "%M", 719 "SS": "%S", 720 "ss": "%S", 721 "FF": "%f_nine", # %f_ internal representation with precision specified 722 "ff": "%f_nine", 723 "FF0": "%f_zero", 724 "ff0": "%f_zero", 725 "FF1": "%f_one", 726 "ff1": "%f_one", 727 "FF2": "%f_two", 728 "ff2": "%f_two", 729 "FF3": "%f_three", 730 "ff3": "%f_three", 731 "FF4": "%f_four", 732 "ff4": "%f_four", 733 "FF5": "%f_five", 734 "ff5": "%f_five", 735 "FF6": "%f", 736 "ff6": "%f", 737 "FF7": "%f_seven", 738 "ff7": "%f_seven", 739 "FF8": "%f_eight", 740 "ff8": "%f_eight", 741 "FF9": "%f_nine", 742 "ff9": "%f_nine", 743 "TZHTZM": "%z", 744 "tzhtzm": "%z", 745 "TZH:TZM": "%:z", # internal representation for ±HH:MM 746 "tzh:tzm": "%:z", 747 "TZH": "%-z", # internal representation ±HH 748 "tzh": "%-z", 749 '"T"': "T", # remove the optional double quotes around the separator between the date and time 750 # Seems like Snowflake treats AM/PM in the format string as equivalent, 751 # only the time (stamp) value's AM/PM affects the output 752 "AM": "%p", 753 "am": "%p", 754 "PM": "%p", 755 "pm": "%p", 756 } 757 758 DATE_PART_MAPPING = { 759 **Dialect.DATE_PART_MAPPING, 760 "ISOWEEK": "WEEKISO", 761 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 762 # EPOCH_SECOND as a distinct value for two reasons: 763 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 764 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 765 # to BIGINT to match Snowflake's integer behavior 766 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 767 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 768 "EPOCH_SECOND": "EPOCH_SECOND", 769 "EPOCH_SECONDS": "EPOCH_SECOND", 770 } 771 772 PSEUDOCOLUMNS = {"LEVEL"} 773 774 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 775 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 776 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 777 return super().can_quote(identifier, identify) and not ( 778 isinstance(identifier.parent, exp.Table) 779 and not identifier.quoted 780 and identifier.name.lower() == "dual" 781 ) 782 783 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 784 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 785 SINGLE_TOKENS.pop("$") 786 787 class Parser(parser.Parser): 788 IDENTIFY_PIVOT_STRINGS = True 789 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 790 COLON_IS_VARIANT_EXTRACT = True 791 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 792 793 ID_VAR_TOKENS = { 794 *parser.Parser.ID_VAR_TOKENS, 795 TokenType.EXCEPT, 796 TokenType.MATCH_CONDITION, 797 } 798 799 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 800 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 801 802 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 803 804 NO_PAREN_FUNCTIONS = { 805 **parser.Parser.NO_PAREN_FUNCTIONS, 806 TokenType.CURRENT_TIME: exp.Localtime, 807 } 808 809 FUNCTIONS = { 810 **parser.Parser.FUNCTIONS, 811 "ADD_MONTHS": lambda args: exp.AddMonths( 812 this=seq_get(args, 0), 813 expression=seq_get(args, 1), 814 preserve_end_of_month=True, 815 ), 816 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 817 "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), 818 "APPROX_TOP_K": _build_approx_top_k, 819 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 820 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 821 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 822 ), 823 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 824 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 825 start=seq_get(args, 0), 826 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 827 step=seq_get(args, 2), 828 ), 829 "ARRAY_SORT": exp.SortArray.from_arg_list, 830 "ARRAY_FLATTEN": exp.Flatten.from_arg_list, 831 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 832 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 833 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 834 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 835 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 836 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 837 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 838 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 839 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 840 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 841 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 842 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 843 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 844 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 845 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 846 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 847 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 848 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 849 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 850 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 851 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 852 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 853 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 854 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 855 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 856 "BOOLAND": lambda args: exp.Booland( 857 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 858 ), 859 "BOOLOR": lambda args: exp.Boolor( 860 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 861 ), 862 "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), 863 "BOOLXOR": lambda args: exp.Xor( 864 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 865 ), 866 "CORR": lambda args: exp.Corr( 867 this=seq_get(args, 0), 868 expression=seq_get(args, 1), 869 null_on_zero_variance=True, 870 ), 871 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 872 "DATEFROMPARTS": _build_date_from_parts, 873 "DATE_FROM_PARTS": _build_date_from_parts, 874 "DATE_TRUNC": _date_trunc_to_time, 875 "DATEADD": _build_date_time_add(exp.DateAdd), 876 "DATEDIFF": _build_datediff, 877 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 878 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 879 "DIV0": _build_if_from_div0, 880 "DIV0NULL": _build_if_from_div0null, 881 "EDITDISTANCE": lambda args: exp.Levenshtein( 882 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 883 ), 884 "FLATTEN": exp.Explode.from_arg_list, 885 "GENERATOR": _build_generator, 886 "GET": exp.GetExtract.from_arg_list, 887 "GETDATE": exp.CurrentTimestamp.from_arg_list, 888 "GET_PATH": lambda args, dialect: exp.JSONExtract( 889 this=seq_get(args, 0), 890 expression=dialect.to_json_path(seq_get(args, 1)), 891 requires_json=True, 892 ), 893 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 894 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 895 ), 896 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 897 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 898 ), 899 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 900 "IFF": exp.If.from_arg_list, 901 "MD5_HEX": exp.MD5.from_arg_list, 902 "MD5_BINARY": exp.MD5Digest.from_arg_list, 903 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 904 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 905 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 906 "LAST_DAY": lambda args: exp.LastDay( 907 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 908 ), 909 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 910 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 911 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 912 "NULLIFZERO": _build_if_from_nullifzero, 913 "OBJECT_CONSTRUCT": _build_object_construct, 914 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 915 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 916 "PARSE_URL": lambda args: exp.ParseUrl( 917 this=seq_get(args, 0), permissive=seq_get(args, 1) 918 ), 919 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 920 "REGEXP_REPLACE": _build_regexp_replace, 921 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 922 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 923 "REPLACE": build_replace_with_optional_replacement, 924 "RLIKE": exp.RegexpLike.from_arg_list, 925 "ROUND": _build_round, 926 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 927 "SHA1_HEX": exp.SHA.from_arg_list, 928 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 929 "SHA2_HEX": exp.SHA2.from_arg_list, 930 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 931 "STDDEV_SAMP": exp.Stddev.from_arg_list, 932 "STRTOK": _build_strtok, 933 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 934 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 935 "TIMEADD": _build_date_time_add(exp.TimeAdd), 936 "TIMEDIFF": _build_datediff, 937 "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( 938 hour=seq_get(args, 0), 939 min=seq_get(args, 1), 940 sec=seq_get(args, 2), 941 nano=seq_get(args, 3), 942 overflow=True, 943 ), 944 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 945 "TIMESTAMPDIFF": _build_datediff, 946 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 947 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 948 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 949 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 950 "TRY_DECRYPT": lambda args: exp.Decrypt( 951 this=seq_get(args, 0), 952 passphrase=seq_get(args, 1), 953 aad=seq_get(args, 2), 954 encryption_method=seq_get(args, 3), 955 safe=True, 956 ), 957 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 958 this=seq_get(args, 0), 959 key=seq_get(args, 1), 960 iv=seq_get(args, 2), 961 aad=seq_get(args, 3), 962 encryption_method=seq_get(args, 4), 963 aead=seq_get(args, 5), 964 safe=True, 965 ), 966 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 967 "TRY_TO_BINARY": lambda args: exp.ToBinary( 968 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 969 ), 970 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 971 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 972 **dict.fromkeys( 973 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 974 ), 975 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 976 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 977 ), 978 "TRY_TO_FILE": lambda args: exp.ToFile( 979 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 980 ), 981 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 982 "TRY_TO_TIMESTAMP": _build_datetime( 983 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 984 ), 985 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 986 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 987 ), 988 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 989 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 990 ), 991 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 992 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 993 ), 994 "TO_CHAR": build_timetostr_or_tochar, 995 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 996 **dict.fromkeys( 997 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 998 lambda args: exp.ToNumber( 999 this=seq_get(args, 0), 1000 format=seq_get(args, 1), 1001 precision=seq_get(args, 2), 1002 scale=seq_get(args, 3), 1003 ), 1004 ), 1005 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 1006 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 1007 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 1008 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 1009 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 1010 "TO_VARCHAR": build_timetostr_or_tochar, 1011 "TO_JSON": exp.JSONFormat.from_arg_list, 1012 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 1013 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 1014 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 1015 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 1016 "ZEROIFNULL": _build_if_from_zeroifnull, 1017 "LIKE": build_like(exp.Like), 1018 "ILIKE": build_like(exp.ILike), 1019 "SEARCH": _build_search, 1020 "SKEW": exp.Skewness.from_arg_list, 1021 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 1022 "WEEKISO": exp.WeekOfYear.from_arg_list, 1023 "WEEKOFYEAR": exp.Week.from_arg_list, 1024 } 1025 FUNCTIONS.pop("PREDICT") 1026 1027 FUNCTION_PARSERS = { 1028 **parser.Parser.FUNCTION_PARSERS, 1029 "DATE_PART": lambda self: self._parse_date_part(), 1030 "DIRECTORY": lambda self: self._parse_directory(), 1031 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 1032 "LISTAGG": lambda self: self._parse_string_agg(), 1033 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 1034 } 1035 FUNCTION_PARSERS.pop("TRIM") 1036 1037 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 1038 1039 ALTER_PARSERS = { 1040 **parser.Parser.ALTER_PARSERS, 1041 "SESSION": lambda self: self._parse_alter_session(), 1042 "UNSET": lambda self: self.expression( 1043 exp.Set, 1044 tag=self._match_text_seq("TAG"), 1045 expressions=self._parse_csv(self._parse_id_var), 1046 unset=True, 1047 ), 1048 } 1049 1050 STATEMENT_PARSERS = { 1051 **parser.Parser.STATEMENT_PARSERS, 1052 TokenType.GET: lambda self: self._parse_get(), 1053 TokenType.PUT: lambda self: self._parse_put(), 1054 TokenType.SHOW: lambda self: self._parse_show(), 1055 } 1056 1057 PROPERTY_PARSERS = { 1058 **parser.Parser.PROPERTY_PARSERS, 1059 "CREDENTIALS": lambda self: self._parse_credentials_property(), 1060 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 1061 "LOCATION": lambda self: self._parse_location_property(), 1062 "TAG": lambda self: self._parse_tag(), 1063 "USING": lambda self: self._match_text_seq("TEMPLATE") 1064 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 1065 } 1066 1067 TYPE_CONVERTERS = { 1068 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 1069 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 1070 } 1071 1072 SHOW_PARSERS = { 1073 "DATABASES": _show_parser("DATABASES"), 1074 "TERSE DATABASES": _show_parser("DATABASES"), 1075 "SCHEMAS": _show_parser("SCHEMAS"), 1076 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 1077 "OBJECTS": _show_parser("OBJECTS"), 1078 "TERSE OBJECTS": _show_parser("OBJECTS"), 1079 "TABLES": _show_parser("TABLES"), 1080 "TERSE TABLES": _show_parser("TABLES"), 1081 "VIEWS": _show_parser("VIEWS"), 1082 "TERSE VIEWS": _show_parser("VIEWS"), 1083 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1084 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1085 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1086 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1087 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1088 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1089 "SEQUENCES": _show_parser("SEQUENCES"), 1090 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1091 "STAGES": _show_parser("STAGES"), 1092 "COLUMNS": _show_parser("COLUMNS"), 1093 "USERS": _show_parser("USERS"), 1094 "TERSE USERS": _show_parser("USERS"), 1095 "FILE FORMATS": _show_parser("FILE FORMATS"), 1096 "FUNCTIONS": _show_parser("FUNCTIONS"), 1097 "PROCEDURES": _show_parser("PROCEDURES"), 1098 "WAREHOUSES": _show_parser("WAREHOUSES"), 1099 } 1100 1101 CONSTRAINT_PARSERS = { 1102 **parser.Parser.CONSTRAINT_PARSERS, 1103 "WITH": lambda self: self._parse_with_constraint(), 1104 "MASKING": lambda self: self._parse_with_constraint(), 1105 "PROJECTION": lambda self: self._parse_with_constraint(), 1106 "TAG": lambda self: self._parse_with_constraint(), 1107 } 1108 1109 STAGED_FILE_SINGLE_TOKENS = { 1110 TokenType.DOT, 1111 TokenType.MOD, 1112 TokenType.SLASH, 1113 } 1114 1115 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1116 1117 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1118 1119 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1120 1121 LAMBDAS = { 1122 **parser.Parser.LAMBDAS, 1123 TokenType.ARROW: lambda self, expressions: self.expression( 1124 exp.Lambda, 1125 this=self._replace_lambda( 1126 self._parse_assignment(), 1127 expressions, 1128 ), 1129 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1130 ), 1131 } 1132 1133 COLUMN_OPERATORS = { 1134 **parser.Parser.COLUMN_OPERATORS, 1135 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1136 exp.ModelAttribute, this=this, expression=attr 1137 ), 1138 } 1139 1140 def _parse_directory(self) -> exp.DirectoryStage: 1141 table = self._parse_table_parts() 1142 1143 if isinstance(table, exp.Table): 1144 table = table.this 1145 1146 return self.expression(exp.DirectoryStage, this=table) 1147 1148 def _parse_use(self) -> exp.Use: 1149 if self._match_text_seq("SECONDARY", "ROLES"): 1150 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1151 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1152 return self.expression( 1153 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1154 ) 1155 1156 return super()._parse_use() 1157 1158 def _negate_range( 1159 self, this: t.Optional[exp.Expression] = None 1160 ) -> t.Optional[exp.Expression]: 1161 if not this: 1162 return this 1163 1164 query = this.args.get("query") 1165 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1166 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1167 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1168 # which can produce different results (most likely a SnowFlake bug). 1169 # 1170 # https://docs.snowflake.com/en/sql-reference/functions/in 1171 # Context: https://github.com/tobymao/sqlglot/issues/3890 1172 return self.expression( 1173 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1174 ) 1175 1176 return self.expression(exp.Not, this=this) 1177 1178 def _parse_tag(self) -> exp.Tags: 1179 return self.expression( 1180 exp.Tags, 1181 expressions=self._parse_wrapped_csv(self._parse_property), 1182 ) 1183 1184 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1185 if self._prev.token_type != TokenType.WITH: 1186 self._retreat(self._index - 1) 1187 1188 if self._match_text_seq("MASKING", "POLICY"): 1189 policy = self._parse_column() 1190 return self.expression( 1191 exp.MaskingPolicyColumnConstraint, 1192 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1193 expressions=self._match(TokenType.USING) 1194 and self._parse_wrapped_csv(self._parse_id_var), 1195 ) 1196 if self._match_text_seq("PROJECTION", "POLICY"): 1197 policy = self._parse_column() 1198 return self.expression( 1199 exp.ProjectionPolicyColumnConstraint, 1200 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1201 ) 1202 if self._match(TokenType.TAG): 1203 return self._parse_tag() 1204 1205 return None 1206 1207 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1208 if self._match(TokenType.TAG): 1209 return self._parse_tag() 1210 1211 return super()._parse_with_property() 1212 1213 def _parse_create(self) -> exp.Create | exp.Command: 1214 expression = super()._parse_create() 1215 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1216 # Replace the Table node with the enclosed Identifier 1217 expression.this.replace(expression.this.this) 1218 1219 return expression 1220 1221 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1222 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1223 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1224 this = self._parse_var() or self._parse_type() 1225 1226 if not this: 1227 return None 1228 1229 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1230 expression = ( 1231 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1232 ) 1233 return self.expression( 1234 exp.Extract, this=map_date_part(this, self.dialect), expression=expression 1235 ) 1236 1237 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1238 if is_map: 1239 # Keys are strings in Snowflake's objects, see also: 1240 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1241 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1242 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1243 1244 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1245 1246 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1247 lateral = super()._parse_lateral() 1248 if not lateral: 1249 return lateral 1250 1251 if isinstance(lateral.this, exp.Explode): 1252 table_alias = lateral.args.get("alias") 1253 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1254 if table_alias and not table_alias.args.get("columns"): 1255 table_alias.set("columns", columns) 1256 elif not table_alias: 1257 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1258 1259 return lateral 1260 1261 def _parse_table_parts( 1262 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1263 ) -> exp.Table: 1264 # https://docs.snowflake.com/en/user-guide/querying-stage 1265 if self._match(TokenType.STRING, advance=False): 1266 table = self._parse_string() 1267 elif self._match_text_seq("@", advance=False): 1268 table = self._parse_location_path() 1269 else: 1270 table = None 1271 1272 if table: 1273 file_format = None 1274 pattern = None 1275 1276 wrapped = self._match(TokenType.L_PAREN) 1277 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1278 if self._match_text_seq("FILE_FORMAT", "=>"): 1279 file_format = self._parse_string() or super()._parse_table_parts( 1280 is_db_reference=is_db_reference 1281 ) 1282 elif self._match_text_seq("PATTERN", "=>"): 1283 pattern = self._parse_string() 1284 else: 1285 break 1286 1287 self._match(TokenType.COMMA) 1288 1289 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1290 else: 1291 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1292 1293 return table 1294 1295 def _parse_table( 1296 self, 1297 schema: bool = False, 1298 joins: bool = False, 1299 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1300 parse_bracket: bool = False, 1301 is_db_reference: bool = False, 1302 parse_partition: bool = False, 1303 consume_pipe: bool = False, 1304 ) -> t.Optional[exp.Expression]: 1305 table = super()._parse_table( 1306 schema=schema, 1307 joins=joins, 1308 alias_tokens=alias_tokens, 1309 parse_bracket=parse_bracket, 1310 is_db_reference=is_db_reference, 1311 parse_partition=parse_partition, 1312 ) 1313 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1314 table_from_rows = table.this 1315 for arg in exp.TableFromRows.arg_types: 1316 if arg != "this": 1317 table_from_rows.set(arg, table.args.get(arg)) 1318 1319 table = table_from_rows 1320 1321 return table 1322 1323 def _parse_id_var( 1324 self, 1325 any_token: bool = True, 1326 tokens: t.Optional[t.Collection[TokenType]] = None, 1327 ) -> t.Optional[exp.Expression]: 1328 if self._match_text_seq("IDENTIFIER", "("): 1329 identifier = ( 1330 super()._parse_id_var(any_token=any_token, tokens=tokens) 1331 or self._parse_string() 1332 ) 1333 self._match_r_paren() 1334 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1335 1336 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1337 1338 def _parse_show_snowflake(self, this: str) -> exp.Show: 1339 scope = None 1340 scope_kind = None 1341 1342 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1343 # which is syntactically valid but has no effect on the output 1344 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1345 1346 history = self._match_text_seq("HISTORY") 1347 1348 like = self._parse_string() if self._match(TokenType.LIKE) else None 1349 1350 if self._match(TokenType.IN): 1351 if self._match_text_seq("ACCOUNT"): 1352 scope_kind = "ACCOUNT" 1353 elif self._match_text_seq("CLASS"): 1354 scope_kind = "CLASS" 1355 scope = self._parse_table_parts() 1356 elif self._match_text_seq("APPLICATION"): 1357 scope_kind = "APPLICATION" 1358 if self._match_text_seq("PACKAGE"): 1359 scope_kind += " PACKAGE" 1360 scope = self._parse_table_parts() 1361 elif self._match_set(self.DB_CREATABLES): 1362 scope_kind = self._prev.text.upper() 1363 if self._curr: 1364 scope = self._parse_table_parts() 1365 elif self._curr: 1366 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1367 scope = self._parse_table_parts() 1368 1369 return self.expression( 1370 exp.Show, 1371 terse=terse, 1372 this=this, 1373 history=history, 1374 like=like, 1375 scope=scope, 1376 scope_kind=scope_kind, 1377 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1378 limit=self._parse_limit(), 1379 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1380 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1381 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1382 ) 1383 1384 def _parse_put(self) -> exp.Put | exp.Command: 1385 if self._curr.token_type != TokenType.STRING: 1386 return self._parse_as_command(self._prev) 1387 1388 return self.expression( 1389 exp.Put, 1390 this=self._parse_string(), 1391 target=self._parse_location_path(), 1392 properties=self._parse_properties(), 1393 ) 1394 1395 def _parse_get(self) -> t.Optional[exp.Expression]: 1396 start = self._prev 1397 1398 # If we detect GET( then we need to parse a function, not a statement 1399 if self._match(TokenType.L_PAREN): 1400 self._retreat(self._index - 2) 1401 return self._parse_expression() 1402 1403 target = self._parse_location_path() 1404 1405 # Parse as command if unquoted file path 1406 if self._curr.token_type == TokenType.URI_START: 1407 return self._parse_as_command(start) 1408 1409 return self.expression( 1410 exp.Get, 1411 this=self._parse_string(), 1412 target=target, 1413 properties=self._parse_properties(), 1414 ) 1415 1416 def _parse_location_property(self) -> exp.LocationProperty: 1417 self._match(TokenType.EQ) 1418 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1419 1420 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1421 # Parse either a subquery or a staged file 1422 return ( 1423 self._parse_select(table=True, parse_subquery_alias=False) 1424 if self._match(TokenType.L_PAREN, advance=False) 1425 else self._parse_table_parts() 1426 ) 1427 1428 def _parse_location_path(self) -> exp.Var: 1429 start = self._curr 1430 self._advance_any(ignore_reserved=True) 1431 1432 # We avoid consuming a comma token because external tables like @foo and @bar 1433 # can be joined in a query with a comma separator, as well as closing paren 1434 # in case of subqueries 1435 while self._is_connected() and not self._match_set( 1436 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1437 ): 1438 self._advance_any(ignore_reserved=True) 1439 1440 return exp.var(self._find_sql(start, self._prev)) 1441 1442 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1443 this = super()._parse_lambda_arg() 1444 1445 if not this: 1446 return this 1447 1448 typ = self._parse_types() 1449 1450 if typ: 1451 return self.expression(exp.Cast, this=this, to=typ) 1452 1453 return this 1454 1455 def _parse_foreign_key(self) -> exp.ForeignKey: 1456 # inlineFK, the REFERENCES columns are implied 1457 if self._match(TokenType.REFERENCES, advance=False): 1458 return self.expression(exp.ForeignKey) 1459 1460 # outoflineFK, explicitly names the columns 1461 return super()._parse_foreign_key() 1462 1463 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1464 self._match(TokenType.EQ) 1465 if self._match(TokenType.L_PAREN, advance=False): 1466 expressions = self._parse_wrapped_options() 1467 else: 1468 expressions = [self._parse_format_name()] 1469 1470 return self.expression( 1471 exp.FileFormatProperty, 1472 expressions=expressions, 1473 ) 1474 1475 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1476 return self.expression( 1477 exp.CredentialsProperty, 1478 expressions=self._parse_wrapped_options(), 1479 ) 1480 1481 def _parse_semantic_view(self) -> exp.SemanticView: 1482 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1483 1484 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1485 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1486 keyword = self._prev.text.lower() 1487 kwargs[keyword] = self._parse_csv(self._parse_disjunction) 1488 elif self._match_text_seq("WHERE"): 1489 kwargs["where"] = self._parse_expression() 1490 else: 1491 self.raise_error("Expecting ) or encountered unexpected keyword") 1492 break 1493 1494 return self.expression(exp.SemanticView, **kwargs) 1495 1496 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1497 set = super()._parse_set(unset=unset, tag=tag) 1498 1499 if isinstance(set, exp.Set): 1500 for expr in set.expressions: 1501 if isinstance(expr, exp.SetItem): 1502 expr.set("kind", "VARIABLE") 1503 return set 1504 1505 class Tokenizer(tokens.Tokenizer): 1506 STRING_ESCAPES = ["\\", "'"] 1507 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1508 RAW_STRINGS = ["$$"] 1509 COMMENTS = ["--", "//", ("/*", "*/")] 1510 NESTED_COMMENTS = False 1511 1512 KEYWORDS = { 1513 **tokens.Tokenizer.KEYWORDS, 1514 "BYTEINT": TokenType.INT, 1515 "FILE://": TokenType.URI_START, 1516 "FILE FORMAT": TokenType.FILE_FORMAT, 1517 "GET": TokenType.GET, 1518 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1519 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1520 "MINUS": TokenType.EXCEPT, 1521 "NCHAR VARYING": TokenType.VARCHAR, 1522 "PUT": TokenType.PUT, 1523 "REMOVE": TokenType.COMMAND, 1524 "RM": TokenType.COMMAND, 1525 "SAMPLE": TokenType.TABLE_SAMPLE, 1526 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1527 "SQL_DOUBLE": TokenType.DOUBLE, 1528 "SQL_VARCHAR": TokenType.VARCHAR, 1529 "STAGE": TokenType.STAGE, 1530 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1531 "STREAMLIT": TokenType.STREAMLIT, 1532 "TAG": TokenType.TAG, 1533 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1534 "TOP": TokenType.TOP, 1535 "WAREHOUSE": TokenType.WAREHOUSE, 1536 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1537 # FLOAT is a synonym for DOUBLE in Snowflake 1538 "FLOAT": TokenType.DOUBLE, 1539 } 1540 KEYWORDS.pop("/*+") 1541 1542 SINGLE_TOKENS = { 1543 **tokens.Tokenizer.SINGLE_TOKENS, 1544 "$": TokenType.PARAMETER, 1545 "!": TokenType.EXCLAMATION, 1546 } 1547 1548 VAR_SINGLE_TOKENS = {"$"} 1549 1550 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1551 1552 class Generator(generator.Generator): 1553 PARAMETER_TOKEN = "$" 1554 MATCHED_BY_SOURCE = False 1555 SINGLE_STRING_INTERVAL = True 1556 JOIN_HINTS = False 1557 TABLE_HINTS = False 1558 QUERY_HINTS = False 1559 AGGREGATE_FILTER_SUPPORTED = False 1560 SUPPORTS_TABLE_COPY = False 1561 COLLATE_IS_FUNC = True 1562 LIMIT_ONLY_LITERALS = True 1563 JSON_KEY_VALUE_PAIR_SEP = "," 1564 INSERT_OVERWRITE = " OVERWRITE INTO" 1565 STRUCT_DELIMITER = ("(", ")") 1566 COPY_PARAMS_ARE_WRAPPED = False 1567 COPY_PARAMS_EQ_REQUIRED = True 1568 STAR_EXCEPT = "EXCLUDE" 1569 SUPPORTS_EXPLODING_PROJECTIONS = False 1570 ARRAY_CONCAT_IS_VAR_LEN = False 1571 SUPPORTS_CONVERT_TIMEZONE = True 1572 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1573 SUPPORTS_MEDIAN = True 1574 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1575 SUPPORTS_DECODE_CASE = True 1576 IS_BOOL_ALLOWED = False 1577 DIRECTED_JOINS = True 1578 1579 TRANSFORMS = { 1580 **generator.Generator.TRANSFORMS, 1581 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1582 exp.ArgMax: rename_func("MAX_BY"), 1583 exp.ArgMin: rename_func("MIN_BY"), 1584 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1585 exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), 1586 exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), 1587 exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), 1588 exp.ArrayContains: lambda self, e: self.func( 1589 "ARRAY_CONTAINS", 1590 e.expression 1591 if e.args.get("ensure_variant") is False 1592 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1593 e.this, 1594 ), 1595 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1596 exp.AtTimeZone: lambda self, e: self.func( 1597 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1598 ), 1599 exp.BitwiseOr: rename_func("BITOR"), 1600 exp.BitwiseXor: rename_func("BITXOR"), 1601 exp.BitwiseAnd: rename_func("BITAND"), 1602 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1603 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1604 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1605 exp.BitwiseNot: rename_func("BITNOT"), 1606 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1607 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1608 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1609 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1610 if e.args.get("sysdate") 1611 else self.function_fallback_sql(e), 1612 exp.Localtime: lambda self, e: self.func("CURRENT_TIME", e.this) 1613 if e.this 1614 else "CURRENT_TIME", 1615 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1616 if e.this 1617 else "CURRENT_TIMESTAMP", 1618 exp.DateAdd: date_delta_sql("DATEADD"), 1619 exp.DateDiff: date_delta_sql("DATEDIFF"), 1620 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1621 exp.DatetimeDiff: timestampdiff_sql, 1622 exp.DateStrToDate: datestrtodate_sql, 1623 exp.Decrypt: lambda self, e: self.func( 1624 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1625 e.this, 1626 e.args.get("passphrase"), 1627 e.args.get("aad"), 1628 e.args.get("encryption_method"), 1629 ), 1630 exp.DecryptRaw: lambda self, e: self.func( 1631 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1632 e.this, 1633 e.args.get("key"), 1634 e.args.get("iv"), 1635 e.args.get("aad"), 1636 e.args.get("encryption_method"), 1637 e.args.get("aead"), 1638 ), 1639 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1640 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1641 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1642 exp.DayOfYear: rename_func("DAYOFYEAR"), 1643 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1644 exp.Explode: rename_func("FLATTEN"), 1645 exp.Extract: lambda self, e: self.func( 1646 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1647 ), 1648 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1649 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1650 exp.FileFormatProperty: lambda self, 1651 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1652 exp.FromTimeZone: lambda self, e: self.func( 1653 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1654 ), 1655 exp.GenerateSeries: lambda self, e: self.func( 1656 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1657 ), 1658 exp.GetExtract: rename_func("GET"), 1659 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1660 exp.If: if_sql(name="IFF", false_value="NULL"), 1661 exp.JSONExtractArray: _json_extract_value_array_sql, 1662 exp.JSONExtractScalar: lambda self, e: self.func( 1663 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1664 ), 1665 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1666 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1667 exp.JSONPathRoot: lambda *_: "", 1668 exp.JSONValueArray: _json_extract_value_array_sql, 1669 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1670 rename_func("EDITDISTANCE") 1671 ), 1672 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1673 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1674 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1675 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1676 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1677 exp.MakeInterval: no_make_interval_sql, 1678 exp.Max: max_or_greatest, 1679 exp.Min: min_or_least, 1680 exp.ParseJSON: lambda self, e: self.func( 1681 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1682 ), 1683 exp.ToBinary: lambda self, e: self.func( 1684 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1685 ), 1686 exp.ToBoolean: lambda self, e: self.func( 1687 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1688 ), 1689 exp.ToDouble: lambda self, e: self.func( 1690 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1691 ), 1692 exp.ToFile: lambda self, e: self.func( 1693 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1694 ), 1695 exp.ToNumber: lambda self, e: self.func( 1696 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1697 e.this, 1698 e.args.get("format"), 1699 e.args.get("precision"), 1700 e.args.get("scale"), 1701 ), 1702 exp.JSONFormat: rename_func("TO_JSON"), 1703 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1704 exp.PercentileCont: transforms.preprocess( 1705 [transforms.add_within_group_for_percentiles] 1706 ), 1707 exp.PercentileDisc: transforms.preprocess( 1708 [transforms.add_within_group_for_percentiles] 1709 ), 1710 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1711 exp.RegexpExtract: _regexpextract_sql, 1712 exp.RegexpExtractAll: _regexpextract_sql, 1713 exp.RegexpILike: _regexpilike_sql, 1714 exp.Rand: rename_func("RANDOM"), 1715 exp.Select: transforms.preprocess( 1716 [ 1717 transforms.eliminate_window_clause, 1718 transforms.eliminate_distinct_on, 1719 transforms.explode_projection_to_unnest(), 1720 transforms.eliminate_semi_and_anti_joins, 1721 _transform_generate_date_array, 1722 _qualify_unnested_columns, 1723 _eliminate_dot_variant_lookup, 1724 ] 1725 ), 1726 exp.SHA: rename_func("SHA1"), 1727 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1728 exp.MD5Digest: rename_func("MD5_BINARY"), 1729 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1730 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1731 exp.LowerHex: rename_func("TO_CHAR"), 1732 exp.SortArray: rename_func("ARRAY_SORT"), 1733 exp.Skewness: rename_func("SKEW"), 1734 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1735 exp.StartsWith: rename_func("STARTSWITH"), 1736 exp.EndsWith: rename_func("ENDSWITH"), 1737 exp.StrPosition: lambda self, e: strposition_sql( 1738 self, e, func_name="CHARINDEX", supports_position=True 1739 ), 1740 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1741 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1742 exp.Stuff: rename_func("INSERT"), 1743 exp.StPoint: rename_func("ST_MAKEPOINT"), 1744 exp.TimeAdd: date_delta_sql("TIMEADD"), 1745 exp.TimeSlice: lambda self, e: self.func( 1746 "TIME_SLICE", 1747 e.this, 1748 e.expression, 1749 unit_to_str(e), 1750 e.args.get("kind"), 1751 ), 1752 exp.Timestamp: no_timestamp_sql, 1753 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1754 exp.TimestampDiff: lambda self, e: self.func( 1755 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1756 ), 1757 exp.TimestampTrunc: timestamptrunc_sql(), 1758 exp.TimeStrToTime: timestrtotime_sql, 1759 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1760 exp.ToArray: rename_func("TO_ARRAY"), 1761 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1762 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1763 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1764 exp.TsOrDsToDate: lambda self, e: self.func( 1765 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1766 ), 1767 exp.TsOrDsToTime: lambda self, e: self.func( 1768 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1769 ), 1770 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1771 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1772 exp.Uuid: rename_func("UUID_STRING"), 1773 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1774 exp.Booland: rename_func("BOOLAND"), 1775 exp.Boolor: rename_func("BOOLOR"), 1776 exp.WeekOfYear: rename_func("WEEKISO"), 1777 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1778 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1779 exp.Xor: rename_func("BOOLXOR"), 1780 exp.ByteLength: rename_func("OCTET_LENGTH"), 1781 exp.Flatten: rename_func("ARRAY_FLATTEN"), 1782 exp.ArrayConcatAgg: lambda self, e: self.func( 1783 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1784 ), 1785 exp.SHA2Digest: lambda self, e: self.func( 1786 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1787 ), 1788 } 1789 1790 SUPPORTED_JSON_PATH_PARTS = { 1791 exp.JSONPathKey, 1792 exp.JSONPathRoot, 1793 exp.JSONPathSubscript, 1794 } 1795 1796 TYPE_MAPPING = { 1797 **generator.Generator.TYPE_MAPPING, 1798 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1799 exp.DataType.Type.NESTED: "OBJECT", 1800 exp.DataType.Type.STRUCT: "OBJECT", 1801 exp.DataType.Type.TEXT: "VARCHAR", 1802 } 1803 1804 TOKEN_MAPPING = { 1805 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1806 } 1807 1808 PROPERTIES_LOCATION = { 1809 **generator.Generator.PROPERTIES_LOCATION, 1810 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1811 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1812 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1813 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1814 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1815 } 1816 1817 UNSUPPORTED_VALUES_EXPRESSIONS = { 1818 exp.Map, 1819 exp.StarMap, 1820 exp.Struct, 1821 exp.VarMap, 1822 } 1823 1824 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1825 1826 def with_properties(self, properties: exp.Properties) -> str: 1827 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1828 1829 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1830 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1831 values_as_table = False 1832 1833 return super().values_sql(expression, values_as_table=values_as_table) 1834 1835 def datatype_sql(self, expression: exp.DataType) -> str: 1836 # Check if this is a FLOAT type nested inside a VECTOR type 1837 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1838 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1839 if expression.is_type(exp.DataType.Type.DOUBLE): 1840 parent = expression.parent 1841 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1842 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1843 return "FLOAT" 1844 1845 expressions = expression.expressions 1846 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1847 for field_type in expressions: 1848 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1849 if isinstance(field_type, exp.DataType): 1850 return "OBJECT" 1851 if ( 1852 isinstance(field_type, exp.ColumnDef) 1853 and field_type.this 1854 and field_type.this.is_string 1855 ): 1856 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1857 # converting 'foo' into an identifier, we also need to quote it because these 1858 # keys are case-sensitive. For example: 1859 # 1860 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1861 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1862 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1863 1864 return super().datatype_sql(expression) 1865 1866 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1867 return self.func( 1868 "TO_NUMBER", 1869 expression.this, 1870 expression.args.get("format"), 1871 expression.args.get("precision"), 1872 expression.args.get("scale"), 1873 ) 1874 1875 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1876 milli = expression.args.get("milli") 1877 if milli is not None: 1878 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1879 expression.set("nano", milli_to_nano) 1880 1881 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1882 1883 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1884 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1885 return self.func("TO_GEOGRAPHY", expression.this) 1886 if expression.is_type(exp.DataType.Type.GEOMETRY): 1887 return self.func("TO_GEOMETRY", expression.this) 1888 1889 return super().cast_sql(expression, safe_prefix=safe_prefix) 1890 1891 def trycast_sql(self, expression: exp.TryCast) -> str: 1892 value = expression.this 1893 1894 if value.type is None: 1895 from sqlglot.optimizer.annotate_types import annotate_types 1896 1897 value = annotate_types(value, dialect=self.dialect) 1898 1899 # Snowflake requires that TRY_CAST's value be a string 1900 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1901 # if we can deduce that the value is a string, then we can generate TRY_CAST 1902 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1903 return super().trycast_sql(expression) 1904 1905 return self.cast_sql(expression) 1906 1907 def log_sql(self, expression: exp.Log) -> str: 1908 if not expression.expression: 1909 return self.func("LN", expression.this) 1910 1911 return super().log_sql(expression) 1912 1913 def greatest_sql(self, expression: exp.Greatest) -> str: 1914 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1915 return self.func(name, expression.this, *expression.expressions) 1916 1917 def least_sql(self, expression: exp.Least) -> str: 1918 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1919 return self.func(name, expression.this, *expression.expressions) 1920 1921 def generator_sql(self, expression: exp.Generator) -> str: 1922 args = [] 1923 rowcount = expression.args.get("rowcount") 1924 time_limit = expression.args.get("time_limit") 1925 1926 if rowcount: 1927 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1928 if time_limit: 1929 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1930 1931 return self.func("GENERATOR", *args) 1932 1933 def unnest_sql(self, expression: exp.Unnest) -> str: 1934 unnest_alias = expression.args.get("alias") 1935 offset = expression.args.get("offset") 1936 1937 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1938 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1939 1940 columns = [ 1941 exp.to_identifier("seq"), 1942 exp.to_identifier("key"), 1943 exp.to_identifier("path"), 1944 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1945 value, 1946 exp.to_identifier("this"), 1947 ] 1948 1949 if unnest_alias: 1950 unnest_alias.set("columns", columns) 1951 else: 1952 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1953 1954 table_input = self.sql(expression.expressions[0]) 1955 if not table_input.startswith("INPUT =>"): 1956 table_input = f"INPUT => {table_input}" 1957 1958 expression_parent = expression.parent 1959 1960 explode = ( 1961 f"FLATTEN({table_input})" 1962 if isinstance(expression_parent, exp.Lateral) 1963 else f"TABLE(FLATTEN({table_input}))" 1964 ) 1965 alias = self.sql(unnest_alias) 1966 alias = f" AS {alias}" if alias else "" 1967 value = ( 1968 "" 1969 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1970 else f"{value} FROM " 1971 ) 1972 1973 return f"{value}{explode}{alias}" 1974 1975 def show_sql(self, expression: exp.Show) -> str: 1976 terse = "TERSE " if expression.args.get("terse") else "" 1977 history = " HISTORY" if expression.args.get("history") else "" 1978 like = self.sql(expression, "like") 1979 like = f" LIKE {like}" if like else "" 1980 1981 scope = self.sql(expression, "scope") 1982 scope = f" {scope}" if scope else "" 1983 1984 scope_kind = self.sql(expression, "scope_kind") 1985 if scope_kind: 1986 scope_kind = f" IN {scope_kind}" 1987 1988 starts_with = self.sql(expression, "starts_with") 1989 if starts_with: 1990 starts_with = f" STARTS WITH {starts_with}" 1991 1992 limit = self.sql(expression, "limit") 1993 1994 from_ = self.sql(expression, "from_") 1995 if from_: 1996 from_ = f" FROM {from_}" 1997 1998 privileges = self.expressions(expression, key="privileges", flat=True) 1999 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2000 2001 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 2002 2003 def describe_sql(self, expression: exp.Describe) -> str: 2004 # Default to table if kind is unknown 2005 kind_value = expression.args.get("kind") or "TABLE" 2006 kind = f" {kind_value}" if kind_value else "" 2007 this = f" {self.sql(expression, 'this')}" 2008 expressions = self.expressions(expression, flat=True) 2009 expressions = f" {expressions}" if expressions else "" 2010 return f"DESCRIBE{kind}{this}{expressions}" 2011 2012 def generatedasidentitycolumnconstraint_sql( 2013 self, expression: exp.GeneratedAsIdentityColumnConstraint 2014 ) -> str: 2015 start = expression.args.get("start") 2016 start = f" START {start}" if start else "" 2017 increment = expression.args.get("increment") 2018 increment = f" INCREMENT {increment}" if increment else "" 2019 2020 order = expression.args.get("order") 2021 if order is not None: 2022 order_clause = " ORDER" if order else " NOORDER" 2023 else: 2024 order_clause = "" 2025 2026 return f"AUTOINCREMENT{start}{increment}{order_clause}" 2027 2028 def cluster_sql(self, expression: exp.Cluster) -> str: 2029 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 2030 2031 def struct_sql(self, expression: exp.Struct) -> str: 2032 if len(expression.expressions) == 1: 2033 arg = expression.expressions[0] 2034 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2035 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2036 return f"{{{self.sql(expression.expressions[0])}}}" 2037 2038 keys = [] 2039 values = [] 2040 2041 for i, e in enumerate(expression.expressions): 2042 if isinstance(e, exp.PropertyEQ): 2043 keys.append( 2044 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2045 ) 2046 values.append(e.expression) 2047 else: 2048 keys.append(exp.Literal.string(f"_{i}")) 2049 values.append(e) 2050 2051 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 2052 2053 @unsupported_args("weight", "accuracy") 2054 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 2055 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 2056 2057 def alterset_sql(self, expression: exp.AlterSet) -> str: 2058 exprs = self.expressions(expression, flat=True) 2059 exprs = f" {exprs}" if exprs else "" 2060 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2061 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2062 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2063 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2064 tag = self.expressions(expression, key="tag", flat=True) 2065 tag = f" TAG {tag}" if tag else "" 2066 2067 return f"SET{exprs}{file_format}{copy_options}{tag}" 2068 2069 def strtotime_sql(self, expression: exp.StrToTime): 2070 # target_type is stored as a DataType instance 2071 target_type = expression.args.get("target_type") 2072 2073 # Get the type enum from DataType instance or from type annotation 2074 if isinstance(target_type, exp.DataType): 2075 type_enum = target_type.this 2076 elif expression.type: 2077 type_enum = expression.type.this 2078 else: 2079 type_enum = exp.DataType.Type.TIMESTAMP 2080 2081 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2082 2083 return self.func( 2084 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2085 expression.this, 2086 self.format_time(expression), 2087 ) 2088 2089 def timestampsub_sql(self, expression: exp.TimestampSub): 2090 return self.sql( 2091 exp.TimestampAdd( 2092 this=expression.this, 2093 expression=expression.expression * -1, 2094 unit=expression.unit, 2095 ) 2096 ) 2097 2098 def jsonextract_sql(self, expression: exp.JSONExtract): 2099 this = expression.this 2100 2101 # JSON strings are valid coming from other dialects such as BQ so 2102 # for these cases we PARSE_JSON preemptively 2103 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2104 "requires_json" 2105 ): 2106 this = exp.ParseJSON(this=this) 2107 2108 return self.func( 2109 "GET_PATH", 2110 this, 2111 expression.expression, 2112 ) 2113 2114 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2115 this = expression.this 2116 if this.is_string: 2117 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2118 2119 return self.func("TO_CHAR", this, self.format_time(expression)) 2120 2121 def datesub_sql(self, expression: exp.DateSub) -> str: 2122 value = expression.expression 2123 if value: 2124 value.replace(value * (-1)) 2125 else: 2126 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2127 2128 return date_delta_sql("DATEADD")(self, expression) 2129 2130 def select_sql(self, expression: exp.Select) -> str: 2131 limit = expression.args.get("limit") 2132 offset = expression.args.get("offset") 2133 if offset and not limit: 2134 expression.limit(exp.Null(), copy=False) 2135 return super().select_sql(expression) 2136 2137 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2138 is_materialized = expression.find(exp.MaterializedProperty) 2139 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2140 2141 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2142 # For materialized views, COPY GRANTS is located *before* the columns list 2143 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2144 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2145 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2146 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2147 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2148 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2149 2150 this_name = self.sql(expression.this, "this") 2151 copy_grants = self.sql(copy_grants_property) 2152 this_schema = self.schema_columns_sql(expression.this) 2153 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2154 2155 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2156 2157 return super().createable_sql(expression, locations) 2158 2159 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2160 this = expression.this 2161 2162 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2163 # and add it later as part of the WITHIN GROUP clause 2164 order = this if isinstance(this, exp.Order) else None 2165 if order: 2166 expression.set("this", order.this.pop()) 2167 2168 expr_sql = super().arrayagg_sql(expression) 2169 2170 if order: 2171 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2172 2173 return expr_sql 2174 2175 def array_sql(self, expression: exp.Array) -> str: 2176 expressions = expression.expressions 2177 2178 first_expr = seq_get(expressions, 0) 2179 if isinstance(first_expr, exp.Select): 2180 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2181 if first_expr.text("kind").upper() == "STRUCT": 2182 object_construct_args = [] 2183 for expr in first_expr.expressions: 2184 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2185 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2186 name = expr.this if isinstance(expr, exp.Alias) else expr 2187 2188 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2189 2190 array_agg = exp.ArrayAgg( 2191 this=_build_object_construct(args=object_construct_args) 2192 ) 2193 2194 first_expr.set("kind", None) 2195 first_expr.set("expressions", [array_agg]) 2196 2197 return self.sql(first_expr.subquery()) 2198 2199 return inline_array_sql(self, expression) 2200 2201 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2202 zone = self.sql(expression, "this") 2203 if not zone: 2204 return super().currentdate_sql(expression) 2205 2206 expr = exp.Cast( 2207 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2208 to=exp.DataType(this=exp.DataType.Type.DATE), 2209 ) 2210 return self.sql(expr) 2211 2212 def dot_sql(self, expression: exp.Dot) -> str: 2213 this = expression.this 2214 2215 if not this.type: 2216 from sqlglot.optimizer.annotate_types import annotate_types 2217 2218 this = annotate_types(this, dialect=self.dialect) 2219 2220 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2221 # Generate colon notation for the top level STRUCT 2222 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2223 2224 return super().dot_sql(expression) 2225 2226 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2227 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2228 2229 def format_sql(self, expression: exp.Format) -> str: 2230 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2231 return self.func("TO_CHAR", expression.expressions[0]) 2232 2233 return self.function_fallback_sql(expression) 2234 2235 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2236 # Set part_index to 1 if missing 2237 if not expression.args.get("delimiter"): 2238 expression.set("delimiter", exp.Literal.string(" ")) 2239 2240 if not expression.args.get("part_index"): 2241 expression.set("part_index", exp.Literal.number(1)) 2242 2243 return rename_func("SPLIT_PART")(self, expression) 2244 2245 def uniform_sql(self, expression: exp.Uniform) -> str: 2246 gen = expression.args.get("gen") 2247 seed = expression.args.get("seed") 2248 2249 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2250 if seed: 2251 gen = exp.Rand(this=seed) 2252 2253 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2254 if not gen: 2255 gen = exp.Rand() 2256 2257 return self.func("UNIFORM", expression.this, expression.expression, gen)
673class Snowflake(Dialect): 674 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 675 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 676 NULL_ORDERING = "nulls_are_large" 677 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 678 SUPPORTS_USER_DEFINED_TYPES = False 679 SUPPORTS_SEMI_ANTI_JOIN = False 680 PREFER_CTE_ALIAS_COLUMN = True 681 TABLESAMPLE_SIZE_IS_PERCENT = True 682 COPY_PARAMS_ARE_CSV = False 683 ARRAY_AGG_INCLUDES_NULLS = None 684 ARRAY_FUNCS_PROPAGATES_NULLS = True 685 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 686 TRY_CAST_REQUIRES_STRING = True 687 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 688 LEAST_GREATEST_IGNORES_NULLS = False 689 690 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 691 692 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 693 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 694 695 INVERSE_TIME_MAPPING = { 696 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 697 } 698 699 TIME_MAPPING = { 700 "YYYY": "%Y", 701 "yyyy": "%Y", 702 "YY": "%y", 703 "yy": "%y", 704 "MMMM": "%B", 705 "mmmm": "%B", 706 "MON": "%b", 707 "mon": "%b", 708 "MM": "%m", 709 "mm": "%m", 710 "DD": "%d", 711 "dd": "%-d", 712 "DY": "%a", 713 "dy": "%w", 714 "HH24": "%H", 715 "hh24": "%H", 716 "HH12": "%I", 717 "hh12": "%I", 718 "MI": "%M", 719 "mi": "%M", 720 "SS": "%S", 721 "ss": "%S", 722 "FF": "%f_nine", # %f_ internal representation with precision specified 723 "ff": "%f_nine", 724 "FF0": "%f_zero", 725 "ff0": "%f_zero", 726 "FF1": "%f_one", 727 "ff1": "%f_one", 728 "FF2": "%f_two", 729 "ff2": "%f_two", 730 "FF3": "%f_three", 731 "ff3": "%f_three", 732 "FF4": "%f_four", 733 "ff4": "%f_four", 734 "FF5": "%f_five", 735 "ff5": "%f_five", 736 "FF6": "%f", 737 "ff6": "%f", 738 "FF7": "%f_seven", 739 "ff7": "%f_seven", 740 "FF8": "%f_eight", 741 "ff8": "%f_eight", 742 "FF9": "%f_nine", 743 "ff9": "%f_nine", 744 "TZHTZM": "%z", 745 "tzhtzm": "%z", 746 "TZH:TZM": "%:z", # internal representation for ±HH:MM 747 "tzh:tzm": "%:z", 748 "TZH": "%-z", # internal representation ±HH 749 "tzh": "%-z", 750 '"T"': "T", # remove the optional double quotes around the separator between the date and time 751 # Seems like Snowflake treats AM/PM in the format string as equivalent, 752 # only the time (stamp) value's AM/PM affects the output 753 "AM": "%p", 754 "am": "%p", 755 "PM": "%p", 756 "pm": "%p", 757 } 758 759 DATE_PART_MAPPING = { 760 **Dialect.DATE_PART_MAPPING, 761 "ISOWEEK": "WEEKISO", 762 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 763 # EPOCH_SECOND as a distinct value for two reasons: 764 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 765 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 766 # to BIGINT to match Snowflake's integer behavior 767 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 768 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 769 "EPOCH_SECOND": "EPOCH_SECOND", 770 "EPOCH_SECONDS": "EPOCH_SECOND", 771 } 772 773 PSEUDOCOLUMNS = {"LEVEL"} 774 775 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 776 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 777 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 778 return super().can_quote(identifier, identify) and not ( 779 isinstance(identifier.parent, exp.Table) 780 and not identifier.quoted 781 and identifier.name.lower() == "dual" 782 ) 783 784 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 785 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 786 SINGLE_TOKENS.pop("$") 787 788 class Parser(parser.Parser): 789 IDENTIFY_PIVOT_STRINGS = True 790 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 791 COLON_IS_VARIANT_EXTRACT = True 792 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 793 794 ID_VAR_TOKENS = { 795 *parser.Parser.ID_VAR_TOKENS, 796 TokenType.EXCEPT, 797 TokenType.MATCH_CONDITION, 798 } 799 800 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 801 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 802 803 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 804 805 NO_PAREN_FUNCTIONS = { 806 **parser.Parser.NO_PAREN_FUNCTIONS, 807 TokenType.CURRENT_TIME: exp.Localtime, 808 } 809 810 FUNCTIONS = { 811 **parser.Parser.FUNCTIONS, 812 "ADD_MONTHS": lambda args: exp.AddMonths( 813 this=seq_get(args, 0), 814 expression=seq_get(args, 1), 815 preserve_end_of_month=True, 816 ), 817 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 818 "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), 819 "APPROX_TOP_K": _build_approx_top_k, 820 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 821 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 822 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 823 ), 824 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 825 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 826 start=seq_get(args, 0), 827 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 828 step=seq_get(args, 2), 829 ), 830 "ARRAY_SORT": exp.SortArray.from_arg_list, 831 "ARRAY_FLATTEN": exp.Flatten.from_arg_list, 832 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 833 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 834 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 835 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 836 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 837 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 838 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 839 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 840 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 841 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 842 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 843 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 844 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 845 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 846 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 847 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 848 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 849 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 850 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 851 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 852 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 853 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 854 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 855 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 856 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 857 "BOOLAND": lambda args: exp.Booland( 858 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 859 ), 860 "BOOLOR": lambda args: exp.Boolor( 861 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 862 ), 863 "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), 864 "BOOLXOR": lambda args: exp.Xor( 865 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 866 ), 867 "CORR": lambda args: exp.Corr( 868 this=seq_get(args, 0), 869 expression=seq_get(args, 1), 870 null_on_zero_variance=True, 871 ), 872 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 873 "DATEFROMPARTS": _build_date_from_parts, 874 "DATE_FROM_PARTS": _build_date_from_parts, 875 "DATE_TRUNC": _date_trunc_to_time, 876 "DATEADD": _build_date_time_add(exp.DateAdd), 877 "DATEDIFF": _build_datediff, 878 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 879 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 880 "DIV0": _build_if_from_div0, 881 "DIV0NULL": _build_if_from_div0null, 882 "EDITDISTANCE": lambda args: exp.Levenshtein( 883 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 884 ), 885 "FLATTEN": exp.Explode.from_arg_list, 886 "GENERATOR": _build_generator, 887 "GET": exp.GetExtract.from_arg_list, 888 "GETDATE": exp.CurrentTimestamp.from_arg_list, 889 "GET_PATH": lambda args, dialect: exp.JSONExtract( 890 this=seq_get(args, 0), 891 expression=dialect.to_json_path(seq_get(args, 1)), 892 requires_json=True, 893 ), 894 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 895 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 896 ), 897 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 898 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 899 ), 900 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 901 "IFF": exp.If.from_arg_list, 902 "MD5_HEX": exp.MD5.from_arg_list, 903 "MD5_BINARY": exp.MD5Digest.from_arg_list, 904 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 905 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 906 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 907 "LAST_DAY": lambda args: exp.LastDay( 908 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 909 ), 910 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 911 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 912 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 913 "NULLIFZERO": _build_if_from_nullifzero, 914 "OBJECT_CONSTRUCT": _build_object_construct, 915 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 916 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 917 "PARSE_URL": lambda args: exp.ParseUrl( 918 this=seq_get(args, 0), permissive=seq_get(args, 1) 919 ), 920 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 921 "REGEXP_REPLACE": _build_regexp_replace, 922 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 923 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 924 "REPLACE": build_replace_with_optional_replacement, 925 "RLIKE": exp.RegexpLike.from_arg_list, 926 "ROUND": _build_round, 927 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 928 "SHA1_HEX": exp.SHA.from_arg_list, 929 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 930 "SHA2_HEX": exp.SHA2.from_arg_list, 931 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 932 "STDDEV_SAMP": exp.Stddev.from_arg_list, 933 "STRTOK": _build_strtok, 934 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 935 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 936 "TIMEADD": _build_date_time_add(exp.TimeAdd), 937 "TIMEDIFF": _build_datediff, 938 "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( 939 hour=seq_get(args, 0), 940 min=seq_get(args, 1), 941 sec=seq_get(args, 2), 942 nano=seq_get(args, 3), 943 overflow=True, 944 ), 945 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 946 "TIMESTAMPDIFF": _build_datediff, 947 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 948 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 949 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 950 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 951 "TRY_DECRYPT": lambda args: exp.Decrypt( 952 this=seq_get(args, 0), 953 passphrase=seq_get(args, 1), 954 aad=seq_get(args, 2), 955 encryption_method=seq_get(args, 3), 956 safe=True, 957 ), 958 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 959 this=seq_get(args, 0), 960 key=seq_get(args, 1), 961 iv=seq_get(args, 2), 962 aad=seq_get(args, 3), 963 encryption_method=seq_get(args, 4), 964 aead=seq_get(args, 5), 965 safe=True, 966 ), 967 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 968 "TRY_TO_BINARY": lambda args: exp.ToBinary( 969 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 970 ), 971 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 972 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 973 **dict.fromkeys( 974 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 975 ), 976 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 977 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 978 ), 979 "TRY_TO_FILE": lambda args: exp.ToFile( 980 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 981 ), 982 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 983 "TRY_TO_TIMESTAMP": _build_datetime( 984 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 985 ), 986 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 987 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 988 ), 989 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 990 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 991 ), 992 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 993 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 994 ), 995 "TO_CHAR": build_timetostr_or_tochar, 996 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 997 **dict.fromkeys( 998 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 999 lambda args: exp.ToNumber( 1000 this=seq_get(args, 0), 1001 format=seq_get(args, 1), 1002 precision=seq_get(args, 2), 1003 scale=seq_get(args, 3), 1004 ), 1005 ), 1006 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 1007 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 1008 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 1009 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 1010 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 1011 "TO_VARCHAR": build_timetostr_or_tochar, 1012 "TO_JSON": exp.JSONFormat.from_arg_list, 1013 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 1014 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 1015 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 1016 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 1017 "ZEROIFNULL": _build_if_from_zeroifnull, 1018 "LIKE": build_like(exp.Like), 1019 "ILIKE": build_like(exp.ILike), 1020 "SEARCH": _build_search, 1021 "SKEW": exp.Skewness.from_arg_list, 1022 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 1023 "WEEKISO": exp.WeekOfYear.from_arg_list, 1024 "WEEKOFYEAR": exp.Week.from_arg_list, 1025 } 1026 FUNCTIONS.pop("PREDICT") 1027 1028 FUNCTION_PARSERS = { 1029 **parser.Parser.FUNCTION_PARSERS, 1030 "DATE_PART": lambda self: self._parse_date_part(), 1031 "DIRECTORY": lambda self: self._parse_directory(), 1032 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 1033 "LISTAGG": lambda self: self._parse_string_agg(), 1034 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 1035 } 1036 FUNCTION_PARSERS.pop("TRIM") 1037 1038 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 1039 1040 ALTER_PARSERS = { 1041 **parser.Parser.ALTER_PARSERS, 1042 "SESSION": lambda self: self._parse_alter_session(), 1043 "UNSET": lambda self: self.expression( 1044 exp.Set, 1045 tag=self._match_text_seq("TAG"), 1046 expressions=self._parse_csv(self._parse_id_var), 1047 unset=True, 1048 ), 1049 } 1050 1051 STATEMENT_PARSERS = { 1052 **parser.Parser.STATEMENT_PARSERS, 1053 TokenType.GET: lambda self: self._parse_get(), 1054 TokenType.PUT: lambda self: self._parse_put(), 1055 TokenType.SHOW: lambda self: self._parse_show(), 1056 } 1057 1058 PROPERTY_PARSERS = { 1059 **parser.Parser.PROPERTY_PARSERS, 1060 "CREDENTIALS": lambda self: self._parse_credentials_property(), 1061 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 1062 "LOCATION": lambda self: self._parse_location_property(), 1063 "TAG": lambda self: self._parse_tag(), 1064 "USING": lambda self: self._match_text_seq("TEMPLATE") 1065 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 1066 } 1067 1068 TYPE_CONVERTERS = { 1069 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 1070 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 1071 } 1072 1073 SHOW_PARSERS = { 1074 "DATABASES": _show_parser("DATABASES"), 1075 "TERSE DATABASES": _show_parser("DATABASES"), 1076 "SCHEMAS": _show_parser("SCHEMAS"), 1077 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 1078 "OBJECTS": _show_parser("OBJECTS"), 1079 "TERSE OBJECTS": _show_parser("OBJECTS"), 1080 "TABLES": _show_parser("TABLES"), 1081 "TERSE TABLES": _show_parser("TABLES"), 1082 "VIEWS": _show_parser("VIEWS"), 1083 "TERSE VIEWS": _show_parser("VIEWS"), 1084 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1085 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1086 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1087 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1088 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1089 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1090 "SEQUENCES": _show_parser("SEQUENCES"), 1091 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1092 "STAGES": _show_parser("STAGES"), 1093 "COLUMNS": _show_parser("COLUMNS"), 1094 "USERS": _show_parser("USERS"), 1095 "TERSE USERS": _show_parser("USERS"), 1096 "FILE FORMATS": _show_parser("FILE FORMATS"), 1097 "FUNCTIONS": _show_parser("FUNCTIONS"), 1098 "PROCEDURES": _show_parser("PROCEDURES"), 1099 "WAREHOUSES": _show_parser("WAREHOUSES"), 1100 } 1101 1102 CONSTRAINT_PARSERS = { 1103 **parser.Parser.CONSTRAINT_PARSERS, 1104 "WITH": lambda self: self._parse_with_constraint(), 1105 "MASKING": lambda self: self._parse_with_constraint(), 1106 "PROJECTION": lambda self: self._parse_with_constraint(), 1107 "TAG": lambda self: self._parse_with_constraint(), 1108 } 1109 1110 STAGED_FILE_SINGLE_TOKENS = { 1111 TokenType.DOT, 1112 TokenType.MOD, 1113 TokenType.SLASH, 1114 } 1115 1116 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1117 1118 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1119 1120 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1121 1122 LAMBDAS = { 1123 **parser.Parser.LAMBDAS, 1124 TokenType.ARROW: lambda self, expressions: self.expression( 1125 exp.Lambda, 1126 this=self._replace_lambda( 1127 self._parse_assignment(), 1128 expressions, 1129 ), 1130 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1131 ), 1132 } 1133 1134 COLUMN_OPERATORS = { 1135 **parser.Parser.COLUMN_OPERATORS, 1136 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1137 exp.ModelAttribute, this=this, expression=attr 1138 ), 1139 } 1140 1141 def _parse_directory(self) -> exp.DirectoryStage: 1142 table = self._parse_table_parts() 1143 1144 if isinstance(table, exp.Table): 1145 table = table.this 1146 1147 return self.expression(exp.DirectoryStage, this=table) 1148 1149 def _parse_use(self) -> exp.Use: 1150 if self._match_text_seq("SECONDARY", "ROLES"): 1151 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1152 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1153 return self.expression( 1154 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1155 ) 1156 1157 return super()._parse_use() 1158 1159 def _negate_range( 1160 self, this: t.Optional[exp.Expression] = None 1161 ) -> t.Optional[exp.Expression]: 1162 if not this: 1163 return this 1164 1165 query = this.args.get("query") 1166 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1167 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1168 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1169 # which can produce different results (most likely a SnowFlake bug). 1170 # 1171 # https://docs.snowflake.com/en/sql-reference/functions/in 1172 # Context: https://github.com/tobymao/sqlglot/issues/3890 1173 return self.expression( 1174 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1175 ) 1176 1177 return self.expression(exp.Not, this=this) 1178 1179 def _parse_tag(self) -> exp.Tags: 1180 return self.expression( 1181 exp.Tags, 1182 expressions=self._parse_wrapped_csv(self._parse_property), 1183 ) 1184 1185 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1186 if self._prev.token_type != TokenType.WITH: 1187 self._retreat(self._index - 1) 1188 1189 if self._match_text_seq("MASKING", "POLICY"): 1190 policy = self._parse_column() 1191 return self.expression( 1192 exp.MaskingPolicyColumnConstraint, 1193 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1194 expressions=self._match(TokenType.USING) 1195 and self._parse_wrapped_csv(self._parse_id_var), 1196 ) 1197 if self._match_text_seq("PROJECTION", "POLICY"): 1198 policy = self._parse_column() 1199 return self.expression( 1200 exp.ProjectionPolicyColumnConstraint, 1201 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1202 ) 1203 if self._match(TokenType.TAG): 1204 return self._parse_tag() 1205 1206 return None 1207 1208 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1209 if self._match(TokenType.TAG): 1210 return self._parse_tag() 1211 1212 return super()._parse_with_property() 1213 1214 def _parse_create(self) -> exp.Create | exp.Command: 1215 expression = super()._parse_create() 1216 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1217 # Replace the Table node with the enclosed Identifier 1218 expression.this.replace(expression.this.this) 1219 1220 return expression 1221 1222 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1223 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1224 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1225 this = self._parse_var() or self._parse_type() 1226 1227 if not this: 1228 return None 1229 1230 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1231 expression = ( 1232 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1233 ) 1234 return self.expression( 1235 exp.Extract, this=map_date_part(this, self.dialect), expression=expression 1236 ) 1237 1238 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1239 if is_map: 1240 # Keys are strings in Snowflake's objects, see also: 1241 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1242 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1243 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1244 1245 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1246 1247 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1248 lateral = super()._parse_lateral() 1249 if not lateral: 1250 return lateral 1251 1252 if isinstance(lateral.this, exp.Explode): 1253 table_alias = lateral.args.get("alias") 1254 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1255 if table_alias and not table_alias.args.get("columns"): 1256 table_alias.set("columns", columns) 1257 elif not table_alias: 1258 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1259 1260 return lateral 1261 1262 def _parse_table_parts( 1263 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1264 ) -> exp.Table: 1265 # https://docs.snowflake.com/en/user-guide/querying-stage 1266 if self._match(TokenType.STRING, advance=False): 1267 table = self._parse_string() 1268 elif self._match_text_seq("@", advance=False): 1269 table = self._parse_location_path() 1270 else: 1271 table = None 1272 1273 if table: 1274 file_format = None 1275 pattern = None 1276 1277 wrapped = self._match(TokenType.L_PAREN) 1278 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1279 if self._match_text_seq("FILE_FORMAT", "=>"): 1280 file_format = self._parse_string() or super()._parse_table_parts( 1281 is_db_reference=is_db_reference 1282 ) 1283 elif self._match_text_seq("PATTERN", "=>"): 1284 pattern = self._parse_string() 1285 else: 1286 break 1287 1288 self._match(TokenType.COMMA) 1289 1290 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1291 else: 1292 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1293 1294 return table 1295 1296 def _parse_table( 1297 self, 1298 schema: bool = False, 1299 joins: bool = False, 1300 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1301 parse_bracket: bool = False, 1302 is_db_reference: bool = False, 1303 parse_partition: bool = False, 1304 consume_pipe: bool = False, 1305 ) -> t.Optional[exp.Expression]: 1306 table = super()._parse_table( 1307 schema=schema, 1308 joins=joins, 1309 alias_tokens=alias_tokens, 1310 parse_bracket=parse_bracket, 1311 is_db_reference=is_db_reference, 1312 parse_partition=parse_partition, 1313 ) 1314 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1315 table_from_rows = table.this 1316 for arg in exp.TableFromRows.arg_types: 1317 if arg != "this": 1318 table_from_rows.set(arg, table.args.get(arg)) 1319 1320 table = table_from_rows 1321 1322 return table 1323 1324 def _parse_id_var( 1325 self, 1326 any_token: bool = True, 1327 tokens: t.Optional[t.Collection[TokenType]] = None, 1328 ) -> t.Optional[exp.Expression]: 1329 if self._match_text_seq("IDENTIFIER", "("): 1330 identifier = ( 1331 super()._parse_id_var(any_token=any_token, tokens=tokens) 1332 or self._parse_string() 1333 ) 1334 self._match_r_paren() 1335 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1336 1337 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1338 1339 def _parse_show_snowflake(self, this: str) -> exp.Show: 1340 scope = None 1341 scope_kind = None 1342 1343 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1344 # which is syntactically valid but has no effect on the output 1345 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1346 1347 history = self._match_text_seq("HISTORY") 1348 1349 like = self._parse_string() if self._match(TokenType.LIKE) else None 1350 1351 if self._match(TokenType.IN): 1352 if self._match_text_seq("ACCOUNT"): 1353 scope_kind = "ACCOUNT" 1354 elif self._match_text_seq("CLASS"): 1355 scope_kind = "CLASS" 1356 scope = self._parse_table_parts() 1357 elif self._match_text_seq("APPLICATION"): 1358 scope_kind = "APPLICATION" 1359 if self._match_text_seq("PACKAGE"): 1360 scope_kind += " PACKAGE" 1361 scope = self._parse_table_parts() 1362 elif self._match_set(self.DB_CREATABLES): 1363 scope_kind = self._prev.text.upper() 1364 if self._curr: 1365 scope = self._parse_table_parts() 1366 elif self._curr: 1367 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1368 scope = self._parse_table_parts() 1369 1370 return self.expression( 1371 exp.Show, 1372 terse=terse, 1373 this=this, 1374 history=history, 1375 like=like, 1376 scope=scope, 1377 scope_kind=scope_kind, 1378 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1379 limit=self._parse_limit(), 1380 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1381 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1382 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1383 ) 1384 1385 def _parse_put(self) -> exp.Put | exp.Command: 1386 if self._curr.token_type != TokenType.STRING: 1387 return self._parse_as_command(self._prev) 1388 1389 return self.expression( 1390 exp.Put, 1391 this=self._parse_string(), 1392 target=self._parse_location_path(), 1393 properties=self._parse_properties(), 1394 ) 1395 1396 def _parse_get(self) -> t.Optional[exp.Expression]: 1397 start = self._prev 1398 1399 # If we detect GET( then we need to parse a function, not a statement 1400 if self._match(TokenType.L_PAREN): 1401 self._retreat(self._index - 2) 1402 return self._parse_expression() 1403 1404 target = self._parse_location_path() 1405 1406 # Parse as command if unquoted file path 1407 if self._curr.token_type == TokenType.URI_START: 1408 return self._parse_as_command(start) 1409 1410 return self.expression( 1411 exp.Get, 1412 this=self._parse_string(), 1413 target=target, 1414 properties=self._parse_properties(), 1415 ) 1416 1417 def _parse_location_property(self) -> exp.LocationProperty: 1418 self._match(TokenType.EQ) 1419 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1420 1421 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1422 # Parse either a subquery or a staged file 1423 return ( 1424 self._parse_select(table=True, parse_subquery_alias=False) 1425 if self._match(TokenType.L_PAREN, advance=False) 1426 else self._parse_table_parts() 1427 ) 1428 1429 def _parse_location_path(self) -> exp.Var: 1430 start = self._curr 1431 self._advance_any(ignore_reserved=True) 1432 1433 # We avoid consuming a comma token because external tables like @foo and @bar 1434 # can be joined in a query with a comma separator, as well as closing paren 1435 # in case of subqueries 1436 while self._is_connected() and not self._match_set( 1437 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1438 ): 1439 self._advance_any(ignore_reserved=True) 1440 1441 return exp.var(self._find_sql(start, self._prev)) 1442 1443 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1444 this = super()._parse_lambda_arg() 1445 1446 if not this: 1447 return this 1448 1449 typ = self._parse_types() 1450 1451 if typ: 1452 return self.expression(exp.Cast, this=this, to=typ) 1453 1454 return this 1455 1456 def _parse_foreign_key(self) -> exp.ForeignKey: 1457 # inlineFK, the REFERENCES columns are implied 1458 if self._match(TokenType.REFERENCES, advance=False): 1459 return self.expression(exp.ForeignKey) 1460 1461 # outoflineFK, explicitly names the columns 1462 return super()._parse_foreign_key() 1463 1464 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1465 self._match(TokenType.EQ) 1466 if self._match(TokenType.L_PAREN, advance=False): 1467 expressions = self._parse_wrapped_options() 1468 else: 1469 expressions = [self._parse_format_name()] 1470 1471 return self.expression( 1472 exp.FileFormatProperty, 1473 expressions=expressions, 1474 ) 1475 1476 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1477 return self.expression( 1478 exp.CredentialsProperty, 1479 expressions=self._parse_wrapped_options(), 1480 ) 1481 1482 def _parse_semantic_view(self) -> exp.SemanticView: 1483 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1484 1485 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1486 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1487 keyword = self._prev.text.lower() 1488 kwargs[keyword] = self._parse_csv(self._parse_disjunction) 1489 elif self._match_text_seq("WHERE"): 1490 kwargs["where"] = self._parse_expression() 1491 else: 1492 self.raise_error("Expecting ) or encountered unexpected keyword") 1493 break 1494 1495 return self.expression(exp.SemanticView, **kwargs) 1496 1497 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1498 set = super()._parse_set(unset=unset, tag=tag) 1499 1500 if isinstance(set, exp.Set): 1501 for expr in set.expressions: 1502 if isinstance(expr, exp.SetItem): 1503 expr.set("kind", "VARIABLE") 1504 return set 1505 1506 class Tokenizer(tokens.Tokenizer): 1507 STRING_ESCAPES = ["\\", "'"] 1508 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1509 RAW_STRINGS = ["$$"] 1510 COMMENTS = ["--", "//", ("/*", "*/")] 1511 NESTED_COMMENTS = False 1512 1513 KEYWORDS = { 1514 **tokens.Tokenizer.KEYWORDS, 1515 "BYTEINT": TokenType.INT, 1516 "FILE://": TokenType.URI_START, 1517 "FILE FORMAT": TokenType.FILE_FORMAT, 1518 "GET": TokenType.GET, 1519 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1520 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1521 "MINUS": TokenType.EXCEPT, 1522 "NCHAR VARYING": TokenType.VARCHAR, 1523 "PUT": TokenType.PUT, 1524 "REMOVE": TokenType.COMMAND, 1525 "RM": TokenType.COMMAND, 1526 "SAMPLE": TokenType.TABLE_SAMPLE, 1527 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1528 "SQL_DOUBLE": TokenType.DOUBLE, 1529 "SQL_VARCHAR": TokenType.VARCHAR, 1530 "STAGE": TokenType.STAGE, 1531 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1532 "STREAMLIT": TokenType.STREAMLIT, 1533 "TAG": TokenType.TAG, 1534 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1535 "TOP": TokenType.TOP, 1536 "WAREHOUSE": TokenType.WAREHOUSE, 1537 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1538 # FLOAT is a synonym for DOUBLE in Snowflake 1539 "FLOAT": TokenType.DOUBLE, 1540 } 1541 KEYWORDS.pop("/*+") 1542 1543 SINGLE_TOKENS = { 1544 **tokens.Tokenizer.SINGLE_TOKENS, 1545 "$": TokenType.PARAMETER, 1546 "!": TokenType.EXCLAMATION, 1547 } 1548 1549 VAR_SINGLE_TOKENS = {"$"} 1550 1551 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1552 1553 class Generator(generator.Generator): 1554 PARAMETER_TOKEN = "$" 1555 MATCHED_BY_SOURCE = False 1556 SINGLE_STRING_INTERVAL = True 1557 JOIN_HINTS = False 1558 TABLE_HINTS = False 1559 QUERY_HINTS = False 1560 AGGREGATE_FILTER_SUPPORTED = False 1561 SUPPORTS_TABLE_COPY = False 1562 COLLATE_IS_FUNC = True 1563 LIMIT_ONLY_LITERALS = True 1564 JSON_KEY_VALUE_PAIR_SEP = "," 1565 INSERT_OVERWRITE = " OVERWRITE INTO" 1566 STRUCT_DELIMITER = ("(", ")") 1567 COPY_PARAMS_ARE_WRAPPED = False 1568 COPY_PARAMS_EQ_REQUIRED = True 1569 STAR_EXCEPT = "EXCLUDE" 1570 SUPPORTS_EXPLODING_PROJECTIONS = False 1571 ARRAY_CONCAT_IS_VAR_LEN = False 1572 SUPPORTS_CONVERT_TIMEZONE = True 1573 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1574 SUPPORTS_MEDIAN = True 1575 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1576 SUPPORTS_DECODE_CASE = True 1577 IS_BOOL_ALLOWED = False 1578 DIRECTED_JOINS = True 1579 1580 TRANSFORMS = { 1581 **generator.Generator.TRANSFORMS, 1582 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1583 exp.ArgMax: rename_func("MAX_BY"), 1584 exp.ArgMin: rename_func("MIN_BY"), 1585 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1586 exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), 1587 exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), 1588 exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), 1589 exp.ArrayContains: lambda self, e: self.func( 1590 "ARRAY_CONTAINS", 1591 e.expression 1592 if e.args.get("ensure_variant") is False 1593 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1594 e.this, 1595 ), 1596 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1597 exp.AtTimeZone: lambda self, e: self.func( 1598 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1599 ), 1600 exp.BitwiseOr: rename_func("BITOR"), 1601 exp.BitwiseXor: rename_func("BITXOR"), 1602 exp.BitwiseAnd: rename_func("BITAND"), 1603 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1604 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1605 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1606 exp.BitwiseNot: rename_func("BITNOT"), 1607 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1608 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1609 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1610 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1611 if e.args.get("sysdate") 1612 else self.function_fallback_sql(e), 1613 exp.Localtime: lambda self, e: self.func("CURRENT_TIME", e.this) 1614 if e.this 1615 else "CURRENT_TIME", 1616 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1617 if e.this 1618 else "CURRENT_TIMESTAMP", 1619 exp.DateAdd: date_delta_sql("DATEADD"), 1620 exp.DateDiff: date_delta_sql("DATEDIFF"), 1621 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1622 exp.DatetimeDiff: timestampdiff_sql, 1623 exp.DateStrToDate: datestrtodate_sql, 1624 exp.Decrypt: lambda self, e: self.func( 1625 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1626 e.this, 1627 e.args.get("passphrase"), 1628 e.args.get("aad"), 1629 e.args.get("encryption_method"), 1630 ), 1631 exp.DecryptRaw: lambda self, e: self.func( 1632 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1633 e.this, 1634 e.args.get("key"), 1635 e.args.get("iv"), 1636 e.args.get("aad"), 1637 e.args.get("encryption_method"), 1638 e.args.get("aead"), 1639 ), 1640 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1641 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1642 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1643 exp.DayOfYear: rename_func("DAYOFYEAR"), 1644 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1645 exp.Explode: rename_func("FLATTEN"), 1646 exp.Extract: lambda self, e: self.func( 1647 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1648 ), 1649 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1650 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1651 exp.FileFormatProperty: lambda self, 1652 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1653 exp.FromTimeZone: lambda self, e: self.func( 1654 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1655 ), 1656 exp.GenerateSeries: lambda self, e: self.func( 1657 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1658 ), 1659 exp.GetExtract: rename_func("GET"), 1660 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1661 exp.If: if_sql(name="IFF", false_value="NULL"), 1662 exp.JSONExtractArray: _json_extract_value_array_sql, 1663 exp.JSONExtractScalar: lambda self, e: self.func( 1664 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1665 ), 1666 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1667 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1668 exp.JSONPathRoot: lambda *_: "", 1669 exp.JSONValueArray: _json_extract_value_array_sql, 1670 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1671 rename_func("EDITDISTANCE") 1672 ), 1673 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1674 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1675 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1676 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1677 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1678 exp.MakeInterval: no_make_interval_sql, 1679 exp.Max: max_or_greatest, 1680 exp.Min: min_or_least, 1681 exp.ParseJSON: lambda self, e: self.func( 1682 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1683 ), 1684 exp.ToBinary: lambda self, e: self.func( 1685 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1686 ), 1687 exp.ToBoolean: lambda self, e: self.func( 1688 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1689 ), 1690 exp.ToDouble: lambda self, e: self.func( 1691 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1692 ), 1693 exp.ToFile: lambda self, e: self.func( 1694 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1695 ), 1696 exp.ToNumber: lambda self, e: self.func( 1697 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1698 e.this, 1699 e.args.get("format"), 1700 e.args.get("precision"), 1701 e.args.get("scale"), 1702 ), 1703 exp.JSONFormat: rename_func("TO_JSON"), 1704 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1705 exp.PercentileCont: transforms.preprocess( 1706 [transforms.add_within_group_for_percentiles] 1707 ), 1708 exp.PercentileDisc: transforms.preprocess( 1709 [transforms.add_within_group_for_percentiles] 1710 ), 1711 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1712 exp.RegexpExtract: _regexpextract_sql, 1713 exp.RegexpExtractAll: _regexpextract_sql, 1714 exp.RegexpILike: _regexpilike_sql, 1715 exp.Rand: rename_func("RANDOM"), 1716 exp.Select: transforms.preprocess( 1717 [ 1718 transforms.eliminate_window_clause, 1719 transforms.eliminate_distinct_on, 1720 transforms.explode_projection_to_unnest(), 1721 transforms.eliminate_semi_and_anti_joins, 1722 _transform_generate_date_array, 1723 _qualify_unnested_columns, 1724 _eliminate_dot_variant_lookup, 1725 ] 1726 ), 1727 exp.SHA: rename_func("SHA1"), 1728 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1729 exp.MD5Digest: rename_func("MD5_BINARY"), 1730 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1731 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1732 exp.LowerHex: rename_func("TO_CHAR"), 1733 exp.SortArray: rename_func("ARRAY_SORT"), 1734 exp.Skewness: rename_func("SKEW"), 1735 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1736 exp.StartsWith: rename_func("STARTSWITH"), 1737 exp.EndsWith: rename_func("ENDSWITH"), 1738 exp.StrPosition: lambda self, e: strposition_sql( 1739 self, e, func_name="CHARINDEX", supports_position=True 1740 ), 1741 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1742 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1743 exp.Stuff: rename_func("INSERT"), 1744 exp.StPoint: rename_func("ST_MAKEPOINT"), 1745 exp.TimeAdd: date_delta_sql("TIMEADD"), 1746 exp.TimeSlice: lambda self, e: self.func( 1747 "TIME_SLICE", 1748 e.this, 1749 e.expression, 1750 unit_to_str(e), 1751 e.args.get("kind"), 1752 ), 1753 exp.Timestamp: no_timestamp_sql, 1754 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1755 exp.TimestampDiff: lambda self, e: self.func( 1756 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1757 ), 1758 exp.TimestampTrunc: timestamptrunc_sql(), 1759 exp.TimeStrToTime: timestrtotime_sql, 1760 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1761 exp.ToArray: rename_func("TO_ARRAY"), 1762 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1763 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1764 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1765 exp.TsOrDsToDate: lambda self, e: self.func( 1766 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1767 ), 1768 exp.TsOrDsToTime: lambda self, e: self.func( 1769 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1770 ), 1771 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1772 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1773 exp.Uuid: rename_func("UUID_STRING"), 1774 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1775 exp.Booland: rename_func("BOOLAND"), 1776 exp.Boolor: rename_func("BOOLOR"), 1777 exp.WeekOfYear: rename_func("WEEKISO"), 1778 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1779 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1780 exp.Xor: rename_func("BOOLXOR"), 1781 exp.ByteLength: rename_func("OCTET_LENGTH"), 1782 exp.Flatten: rename_func("ARRAY_FLATTEN"), 1783 exp.ArrayConcatAgg: lambda self, e: self.func( 1784 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1785 ), 1786 exp.SHA2Digest: lambda self, e: self.func( 1787 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1788 ), 1789 } 1790 1791 SUPPORTED_JSON_PATH_PARTS = { 1792 exp.JSONPathKey, 1793 exp.JSONPathRoot, 1794 exp.JSONPathSubscript, 1795 } 1796 1797 TYPE_MAPPING = { 1798 **generator.Generator.TYPE_MAPPING, 1799 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1800 exp.DataType.Type.NESTED: "OBJECT", 1801 exp.DataType.Type.STRUCT: "OBJECT", 1802 exp.DataType.Type.TEXT: "VARCHAR", 1803 } 1804 1805 TOKEN_MAPPING = { 1806 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1807 } 1808 1809 PROPERTIES_LOCATION = { 1810 **generator.Generator.PROPERTIES_LOCATION, 1811 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1812 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1813 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1814 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1815 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1816 } 1817 1818 UNSUPPORTED_VALUES_EXPRESSIONS = { 1819 exp.Map, 1820 exp.StarMap, 1821 exp.Struct, 1822 exp.VarMap, 1823 } 1824 1825 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1826 1827 def with_properties(self, properties: exp.Properties) -> str: 1828 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1829 1830 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1831 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1832 values_as_table = False 1833 1834 return super().values_sql(expression, values_as_table=values_as_table) 1835 1836 def datatype_sql(self, expression: exp.DataType) -> str: 1837 # Check if this is a FLOAT type nested inside a VECTOR type 1838 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1839 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1840 if expression.is_type(exp.DataType.Type.DOUBLE): 1841 parent = expression.parent 1842 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1843 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1844 return "FLOAT" 1845 1846 expressions = expression.expressions 1847 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1848 for field_type in expressions: 1849 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1850 if isinstance(field_type, exp.DataType): 1851 return "OBJECT" 1852 if ( 1853 isinstance(field_type, exp.ColumnDef) 1854 and field_type.this 1855 and field_type.this.is_string 1856 ): 1857 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1858 # converting 'foo' into an identifier, we also need to quote it because these 1859 # keys are case-sensitive. For example: 1860 # 1861 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1862 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1863 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1864 1865 return super().datatype_sql(expression) 1866 1867 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1868 return self.func( 1869 "TO_NUMBER", 1870 expression.this, 1871 expression.args.get("format"), 1872 expression.args.get("precision"), 1873 expression.args.get("scale"), 1874 ) 1875 1876 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1877 milli = expression.args.get("milli") 1878 if milli is not None: 1879 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1880 expression.set("nano", milli_to_nano) 1881 1882 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1883 1884 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1885 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1886 return self.func("TO_GEOGRAPHY", expression.this) 1887 if expression.is_type(exp.DataType.Type.GEOMETRY): 1888 return self.func("TO_GEOMETRY", expression.this) 1889 1890 return super().cast_sql(expression, safe_prefix=safe_prefix) 1891 1892 def trycast_sql(self, expression: exp.TryCast) -> str: 1893 value = expression.this 1894 1895 if value.type is None: 1896 from sqlglot.optimizer.annotate_types import annotate_types 1897 1898 value = annotate_types(value, dialect=self.dialect) 1899 1900 # Snowflake requires that TRY_CAST's value be a string 1901 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1902 # if we can deduce that the value is a string, then we can generate TRY_CAST 1903 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1904 return super().trycast_sql(expression) 1905 1906 return self.cast_sql(expression) 1907 1908 def log_sql(self, expression: exp.Log) -> str: 1909 if not expression.expression: 1910 return self.func("LN", expression.this) 1911 1912 return super().log_sql(expression) 1913 1914 def greatest_sql(self, expression: exp.Greatest) -> str: 1915 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1916 return self.func(name, expression.this, *expression.expressions) 1917 1918 def least_sql(self, expression: exp.Least) -> str: 1919 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1920 return self.func(name, expression.this, *expression.expressions) 1921 1922 def generator_sql(self, expression: exp.Generator) -> str: 1923 args = [] 1924 rowcount = expression.args.get("rowcount") 1925 time_limit = expression.args.get("time_limit") 1926 1927 if rowcount: 1928 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1929 if time_limit: 1930 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1931 1932 return self.func("GENERATOR", *args) 1933 1934 def unnest_sql(self, expression: exp.Unnest) -> str: 1935 unnest_alias = expression.args.get("alias") 1936 offset = expression.args.get("offset") 1937 1938 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1939 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1940 1941 columns = [ 1942 exp.to_identifier("seq"), 1943 exp.to_identifier("key"), 1944 exp.to_identifier("path"), 1945 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1946 value, 1947 exp.to_identifier("this"), 1948 ] 1949 1950 if unnest_alias: 1951 unnest_alias.set("columns", columns) 1952 else: 1953 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1954 1955 table_input = self.sql(expression.expressions[0]) 1956 if not table_input.startswith("INPUT =>"): 1957 table_input = f"INPUT => {table_input}" 1958 1959 expression_parent = expression.parent 1960 1961 explode = ( 1962 f"FLATTEN({table_input})" 1963 if isinstance(expression_parent, exp.Lateral) 1964 else f"TABLE(FLATTEN({table_input}))" 1965 ) 1966 alias = self.sql(unnest_alias) 1967 alias = f" AS {alias}" if alias else "" 1968 value = ( 1969 "" 1970 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1971 else f"{value} FROM " 1972 ) 1973 1974 return f"{value}{explode}{alias}" 1975 1976 def show_sql(self, expression: exp.Show) -> str: 1977 terse = "TERSE " if expression.args.get("terse") else "" 1978 history = " HISTORY" if expression.args.get("history") else "" 1979 like = self.sql(expression, "like") 1980 like = f" LIKE {like}" if like else "" 1981 1982 scope = self.sql(expression, "scope") 1983 scope = f" {scope}" if scope else "" 1984 1985 scope_kind = self.sql(expression, "scope_kind") 1986 if scope_kind: 1987 scope_kind = f" IN {scope_kind}" 1988 1989 starts_with = self.sql(expression, "starts_with") 1990 if starts_with: 1991 starts_with = f" STARTS WITH {starts_with}" 1992 1993 limit = self.sql(expression, "limit") 1994 1995 from_ = self.sql(expression, "from_") 1996 if from_: 1997 from_ = f" FROM {from_}" 1998 1999 privileges = self.expressions(expression, key="privileges", flat=True) 2000 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2001 2002 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 2003 2004 def describe_sql(self, expression: exp.Describe) -> str: 2005 # Default to table if kind is unknown 2006 kind_value = expression.args.get("kind") or "TABLE" 2007 kind = f" {kind_value}" if kind_value else "" 2008 this = f" {self.sql(expression, 'this')}" 2009 expressions = self.expressions(expression, flat=True) 2010 expressions = f" {expressions}" if expressions else "" 2011 return f"DESCRIBE{kind}{this}{expressions}" 2012 2013 def generatedasidentitycolumnconstraint_sql( 2014 self, expression: exp.GeneratedAsIdentityColumnConstraint 2015 ) -> str: 2016 start = expression.args.get("start") 2017 start = f" START {start}" if start else "" 2018 increment = expression.args.get("increment") 2019 increment = f" INCREMENT {increment}" if increment else "" 2020 2021 order = expression.args.get("order") 2022 if order is not None: 2023 order_clause = " ORDER" if order else " NOORDER" 2024 else: 2025 order_clause = "" 2026 2027 return f"AUTOINCREMENT{start}{increment}{order_clause}" 2028 2029 def cluster_sql(self, expression: exp.Cluster) -> str: 2030 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 2031 2032 def struct_sql(self, expression: exp.Struct) -> str: 2033 if len(expression.expressions) == 1: 2034 arg = expression.expressions[0] 2035 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2036 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2037 return f"{{{self.sql(expression.expressions[0])}}}" 2038 2039 keys = [] 2040 values = [] 2041 2042 for i, e in enumerate(expression.expressions): 2043 if isinstance(e, exp.PropertyEQ): 2044 keys.append( 2045 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2046 ) 2047 values.append(e.expression) 2048 else: 2049 keys.append(exp.Literal.string(f"_{i}")) 2050 values.append(e) 2051 2052 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 2053 2054 @unsupported_args("weight", "accuracy") 2055 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 2056 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 2057 2058 def alterset_sql(self, expression: exp.AlterSet) -> str: 2059 exprs = self.expressions(expression, flat=True) 2060 exprs = f" {exprs}" if exprs else "" 2061 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2062 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2063 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2064 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2065 tag = self.expressions(expression, key="tag", flat=True) 2066 tag = f" TAG {tag}" if tag else "" 2067 2068 return f"SET{exprs}{file_format}{copy_options}{tag}" 2069 2070 def strtotime_sql(self, expression: exp.StrToTime): 2071 # target_type is stored as a DataType instance 2072 target_type = expression.args.get("target_type") 2073 2074 # Get the type enum from DataType instance or from type annotation 2075 if isinstance(target_type, exp.DataType): 2076 type_enum = target_type.this 2077 elif expression.type: 2078 type_enum = expression.type.this 2079 else: 2080 type_enum = exp.DataType.Type.TIMESTAMP 2081 2082 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2083 2084 return self.func( 2085 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2086 expression.this, 2087 self.format_time(expression), 2088 ) 2089 2090 def timestampsub_sql(self, expression: exp.TimestampSub): 2091 return self.sql( 2092 exp.TimestampAdd( 2093 this=expression.this, 2094 expression=expression.expression * -1, 2095 unit=expression.unit, 2096 ) 2097 ) 2098 2099 def jsonextract_sql(self, expression: exp.JSONExtract): 2100 this = expression.this 2101 2102 # JSON strings are valid coming from other dialects such as BQ so 2103 # for these cases we PARSE_JSON preemptively 2104 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2105 "requires_json" 2106 ): 2107 this = exp.ParseJSON(this=this) 2108 2109 return self.func( 2110 "GET_PATH", 2111 this, 2112 expression.expression, 2113 ) 2114 2115 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2116 this = expression.this 2117 if this.is_string: 2118 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2119 2120 return self.func("TO_CHAR", this, self.format_time(expression)) 2121 2122 def datesub_sql(self, expression: exp.DateSub) -> str: 2123 value = expression.expression 2124 if value: 2125 value.replace(value * (-1)) 2126 else: 2127 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2128 2129 return date_delta_sql("DATEADD")(self, expression) 2130 2131 def select_sql(self, expression: exp.Select) -> str: 2132 limit = expression.args.get("limit") 2133 offset = expression.args.get("offset") 2134 if offset and not limit: 2135 expression.limit(exp.Null(), copy=False) 2136 return super().select_sql(expression) 2137 2138 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2139 is_materialized = expression.find(exp.MaterializedProperty) 2140 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2141 2142 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2143 # For materialized views, COPY GRANTS is located *before* the columns list 2144 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2145 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2146 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2147 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2148 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2149 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2150 2151 this_name = self.sql(expression.this, "this") 2152 copy_grants = self.sql(copy_grants_property) 2153 this_schema = self.schema_columns_sql(expression.this) 2154 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2155 2156 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2157 2158 return super().createable_sql(expression, locations) 2159 2160 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2161 this = expression.this 2162 2163 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2164 # and add it later as part of the WITHIN GROUP clause 2165 order = this if isinstance(this, exp.Order) else None 2166 if order: 2167 expression.set("this", order.this.pop()) 2168 2169 expr_sql = super().arrayagg_sql(expression) 2170 2171 if order: 2172 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2173 2174 return expr_sql 2175 2176 def array_sql(self, expression: exp.Array) -> str: 2177 expressions = expression.expressions 2178 2179 first_expr = seq_get(expressions, 0) 2180 if isinstance(first_expr, exp.Select): 2181 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2182 if first_expr.text("kind").upper() == "STRUCT": 2183 object_construct_args = [] 2184 for expr in first_expr.expressions: 2185 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2186 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2187 name = expr.this if isinstance(expr, exp.Alias) else expr 2188 2189 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2190 2191 array_agg = exp.ArrayAgg( 2192 this=_build_object_construct(args=object_construct_args) 2193 ) 2194 2195 first_expr.set("kind", None) 2196 first_expr.set("expressions", [array_agg]) 2197 2198 return self.sql(first_expr.subquery()) 2199 2200 return inline_array_sql(self, expression) 2201 2202 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2203 zone = self.sql(expression, "this") 2204 if not zone: 2205 return super().currentdate_sql(expression) 2206 2207 expr = exp.Cast( 2208 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2209 to=exp.DataType(this=exp.DataType.Type.DATE), 2210 ) 2211 return self.sql(expr) 2212 2213 def dot_sql(self, expression: exp.Dot) -> str: 2214 this = expression.this 2215 2216 if not this.type: 2217 from sqlglot.optimizer.annotate_types import annotate_types 2218 2219 this = annotate_types(this, dialect=self.dialect) 2220 2221 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2222 # Generate colon notation for the top level STRUCT 2223 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2224 2225 return super().dot_sql(expression) 2226 2227 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2228 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2229 2230 def format_sql(self, expression: exp.Format) -> str: 2231 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2232 return self.func("TO_CHAR", expression.expressions[0]) 2233 2234 return self.function_fallback_sql(expression) 2235 2236 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2237 # Set part_index to 1 if missing 2238 if not expression.args.get("delimiter"): 2239 expression.set("delimiter", exp.Literal.string(" ")) 2240 2241 if not expression.args.get("part_index"): 2242 expression.set("part_index", exp.Literal.number(1)) 2243 2244 return rename_func("SPLIT_PART")(self, expression) 2245 2246 def uniform_sql(self, expression: exp.Uniform) -> str: 2247 gen = expression.args.get("gen") 2248 seed = expression.args.get("seed") 2249 2250 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2251 if seed: 2252 gen = exp.Rand(this=seed) 2253 2254 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2255 if not gen: 2256 gen = exp.Rand() 2257 2258 return self.func("UNIFORM", expression.this, expression.expression, gen)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Whether Array update functions return NULL when the input array is NULL.
Whether alias references are allowed in JOIN ... ON clauses.
Most dialects do not support this, but Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
For example, in Snowflake: SELECT a.id AS user_id FROM a JOIN b ON user_id = b.id -- VALID
Reference: sqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes">https://docssqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
775 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 776 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 777 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 778 return super().can_quote(identifier, identify) and not ( 779 isinstance(identifier.parent, exp.Table) 780 and not identifier.quoted 781 and identifier.name.lower() == "dual" 782 )
Checks if an identifier can be quoted
Arguments:
- identifier: The identifier to check.
- identify:
True: Always returnsTrueexcept for certain cases."safe": Only returnsTrueif the identifier is case-insensitive."unsafe": Only returnsTrueif the identifier is case-sensitive.
Returns:
Whether the given text can be identified.
Mapping of an escaped sequence (\n) to its unescaped version (
).
784 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 785 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 786 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
788 class Parser(parser.Parser): 789 IDENTIFY_PIVOT_STRINGS = True 790 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 791 COLON_IS_VARIANT_EXTRACT = True 792 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 793 794 ID_VAR_TOKENS = { 795 *parser.Parser.ID_VAR_TOKENS, 796 TokenType.EXCEPT, 797 TokenType.MATCH_CONDITION, 798 } 799 800 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 801 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 802 803 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 804 805 NO_PAREN_FUNCTIONS = { 806 **parser.Parser.NO_PAREN_FUNCTIONS, 807 TokenType.CURRENT_TIME: exp.Localtime, 808 } 809 810 FUNCTIONS = { 811 **parser.Parser.FUNCTIONS, 812 "ADD_MONTHS": lambda args: exp.AddMonths( 813 this=seq_get(args, 0), 814 expression=seq_get(args, 1), 815 preserve_end_of_month=True, 816 ), 817 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 818 "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), 819 "APPROX_TOP_K": _build_approx_top_k, 820 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 821 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 822 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 823 ), 824 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 825 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 826 start=seq_get(args, 0), 827 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 828 step=seq_get(args, 2), 829 ), 830 "ARRAY_SORT": exp.SortArray.from_arg_list, 831 "ARRAY_FLATTEN": exp.Flatten.from_arg_list, 832 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 833 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 834 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 835 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 836 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 837 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 838 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 839 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 840 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 841 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 842 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 843 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 844 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 845 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 846 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 847 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 848 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 849 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 850 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 851 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 852 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 853 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 854 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 855 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 856 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 857 "BOOLAND": lambda args: exp.Booland( 858 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 859 ), 860 "BOOLOR": lambda args: exp.Boolor( 861 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 862 ), 863 "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), 864 "BOOLXOR": lambda args: exp.Xor( 865 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 866 ), 867 "CORR": lambda args: exp.Corr( 868 this=seq_get(args, 0), 869 expression=seq_get(args, 1), 870 null_on_zero_variance=True, 871 ), 872 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 873 "DATEFROMPARTS": _build_date_from_parts, 874 "DATE_FROM_PARTS": _build_date_from_parts, 875 "DATE_TRUNC": _date_trunc_to_time, 876 "DATEADD": _build_date_time_add(exp.DateAdd), 877 "DATEDIFF": _build_datediff, 878 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 879 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 880 "DIV0": _build_if_from_div0, 881 "DIV0NULL": _build_if_from_div0null, 882 "EDITDISTANCE": lambda args: exp.Levenshtein( 883 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 884 ), 885 "FLATTEN": exp.Explode.from_arg_list, 886 "GENERATOR": _build_generator, 887 "GET": exp.GetExtract.from_arg_list, 888 "GETDATE": exp.CurrentTimestamp.from_arg_list, 889 "GET_PATH": lambda args, dialect: exp.JSONExtract( 890 this=seq_get(args, 0), 891 expression=dialect.to_json_path(seq_get(args, 1)), 892 requires_json=True, 893 ), 894 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 895 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 896 ), 897 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 898 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 899 ), 900 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 901 "IFF": exp.If.from_arg_list, 902 "MD5_HEX": exp.MD5.from_arg_list, 903 "MD5_BINARY": exp.MD5Digest.from_arg_list, 904 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 905 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 906 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 907 "LAST_DAY": lambda args: exp.LastDay( 908 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 909 ), 910 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 911 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 912 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 913 "NULLIFZERO": _build_if_from_nullifzero, 914 "OBJECT_CONSTRUCT": _build_object_construct, 915 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 916 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 917 "PARSE_URL": lambda args: exp.ParseUrl( 918 this=seq_get(args, 0), permissive=seq_get(args, 1) 919 ), 920 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 921 "REGEXP_REPLACE": _build_regexp_replace, 922 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 923 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 924 "REPLACE": build_replace_with_optional_replacement, 925 "RLIKE": exp.RegexpLike.from_arg_list, 926 "ROUND": _build_round, 927 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 928 "SHA1_HEX": exp.SHA.from_arg_list, 929 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 930 "SHA2_HEX": exp.SHA2.from_arg_list, 931 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 932 "STDDEV_SAMP": exp.Stddev.from_arg_list, 933 "STRTOK": _build_strtok, 934 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 935 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 936 "TIMEADD": _build_date_time_add(exp.TimeAdd), 937 "TIMEDIFF": _build_datediff, 938 "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( 939 hour=seq_get(args, 0), 940 min=seq_get(args, 1), 941 sec=seq_get(args, 2), 942 nano=seq_get(args, 3), 943 overflow=True, 944 ), 945 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 946 "TIMESTAMPDIFF": _build_datediff, 947 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 948 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 949 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 950 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 951 "TRY_DECRYPT": lambda args: exp.Decrypt( 952 this=seq_get(args, 0), 953 passphrase=seq_get(args, 1), 954 aad=seq_get(args, 2), 955 encryption_method=seq_get(args, 3), 956 safe=True, 957 ), 958 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 959 this=seq_get(args, 0), 960 key=seq_get(args, 1), 961 iv=seq_get(args, 2), 962 aad=seq_get(args, 3), 963 encryption_method=seq_get(args, 4), 964 aead=seq_get(args, 5), 965 safe=True, 966 ), 967 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 968 "TRY_TO_BINARY": lambda args: exp.ToBinary( 969 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 970 ), 971 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 972 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 973 **dict.fromkeys( 974 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 975 ), 976 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 977 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 978 ), 979 "TRY_TO_FILE": lambda args: exp.ToFile( 980 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 981 ), 982 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 983 "TRY_TO_TIMESTAMP": _build_datetime( 984 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 985 ), 986 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 987 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 988 ), 989 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 990 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 991 ), 992 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 993 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 994 ), 995 "TO_CHAR": build_timetostr_or_tochar, 996 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 997 **dict.fromkeys( 998 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 999 lambda args: exp.ToNumber( 1000 this=seq_get(args, 0), 1001 format=seq_get(args, 1), 1002 precision=seq_get(args, 2), 1003 scale=seq_get(args, 3), 1004 ), 1005 ), 1006 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 1007 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 1008 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 1009 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 1010 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 1011 "TO_VARCHAR": build_timetostr_or_tochar, 1012 "TO_JSON": exp.JSONFormat.from_arg_list, 1013 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 1014 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 1015 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 1016 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 1017 "ZEROIFNULL": _build_if_from_zeroifnull, 1018 "LIKE": build_like(exp.Like), 1019 "ILIKE": build_like(exp.ILike), 1020 "SEARCH": _build_search, 1021 "SKEW": exp.Skewness.from_arg_list, 1022 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 1023 "WEEKISO": exp.WeekOfYear.from_arg_list, 1024 "WEEKOFYEAR": exp.Week.from_arg_list, 1025 } 1026 FUNCTIONS.pop("PREDICT") 1027 1028 FUNCTION_PARSERS = { 1029 **parser.Parser.FUNCTION_PARSERS, 1030 "DATE_PART": lambda self: self._parse_date_part(), 1031 "DIRECTORY": lambda self: self._parse_directory(), 1032 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 1033 "LISTAGG": lambda self: self._parse_string_agg(), 1034 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 1035 } 1036 FUNCTION_PARSERS.pop("TRIM") 1037 1038 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 1039 1040 ALTER_PARSERS = { 1041 **parser.Parser.ALTER_PARSERS, 1042 "SESSION": lambda self: self._parse_alter_session(), 1043 "UNSET": lambda self: self.expression( 1044 exp.Set, 1045 tag=self._match_text_seq("TAG"), 1046 expressions=self._parse_csv(self._parse_id_var), 1047 unset=True, 1048 ), 1049 } 1050 1051 STATEMENT_PARSERS = { 1052 **parser.Parser.STATEMENT_PARSERS, 1053 TokenType.GET: lambda self: self._parse_get(), 1054 TokenType.PUT: lambda self: self._parse_put(), 1055 TokenType.SHOW: lambda self: self._parse_show(), 1056 } 1057 1058 PROPERTY_PARSERS = { 1059 **parser.Parser.PROPERTY_PARSERS, 1060 "CREDENTIALS": lambda self: self._parse_credentials_property(), 1061 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 1062 "LOCATION": lambda self: self._parse_location_property(), 1063 "TAG": lambda self: self._parse_tag(), 1064 "USING": lambda self: self._match_text_seq("TEMPLATE") 1065 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 1066 } 1067 1068 TYPE_CONVERTERS = { 1069 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 1070 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 1071 } 1072 1073 SHOW_PARSERS = { 1074 "DATABASES": _show_parser("DATABASES"), 1075 "TERSE DATABASES": _show_parser("DATABASES"), 1076 "SCHEMAS": _show_parser("SCHEMAS"), 1077 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 1078 "OBJECTS": _show_parser("OBJECTS"), 1079 "TERSE OBJECTS": _show_parser("OBJECTS"), 1080 "TABLES": _show_parser("TABLES"), 1081 "TERSE TABLES": _show_parser("TABLES"), 1082 "VIEWS": _show_parser("VIEWS"), 1083 "TERSE VIEWS": _show_parser("VIEWS"), 1084 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1085 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1086 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1087 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1088 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1089 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1090 "SEQUENCES": _show_parser("SEQUENCES"), 1091 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1092 "STAGES": _show_parser("STAGES"), 1093 "COLUMNS": _show_parser("COLUMNS"), 1094 "USERS": _show_parser("USERS"), 1095 "TERSE USERS": _show_parser("USERS"), 1096 "FILE FORMATS": _show_parser("FILE FORMATS"), 1097 "FUNCTIONS": _show_parser("FUNCTIONS"), 1098 "PROCEDURES": _show_parser("PROCEDURES"), 1099 "WAREHOUSES": _show_parser("WAREHOUSES"), 1100 } 1101 1102 CONSTRAINT_PARSERS = { 1103 **parser.Parser.CONSTRAINT_PARSERS, 1104 "WITH": lambda self: self._parse_with_constraint(), 1105 "MASKING": lambda self: self._parse_with_constraint(), 1106 "PROJECTION": lambda self: self._parse_with_constraint(), 1107 "TAG": lambda self: self._parse_with_constraint(), 1108 } 1109 1110 STAGED_FILE_SINGLE_TOKENS = { 1111 TokenType.DOT, 1112 TokenType.MOD, 1113 TokenType.SLASH, 1114 } 1115 1116 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1117 1118 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1119 1120 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1121 1122 LAMBDAS = { 1123 **parser.Parser.LAMBDAS, 1124 TokenType.ARROW: lambda self, expressions: self.expression( 1125 exp.Lambda, 1126 this=self._replace_lambda( 1127 self._parse_assignment(), 1128 expressions, 1129 ), 1130 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1131 ), 1132 } 1133 1134 COLUMN_OPERATORS = { 1135 **parser.Parser.COLUMN_OPERATORS, 1136 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1137 exp.ModelAttribute, this=this, expression=attr 1138 ), 1139 } 1140 1141 def _parse_directory(self) -> exp.DirectoryStage: 1142 table = self._parse_table_parts() 1143 1144 if isinstance(table, exp.Table): 1145 table = table.this 1146 1147 return self.expression(exp.DirectoryStage, this=table) 1148 1149 def _parse_use(self) -> exp.Use: 1150 if self._match_text_seq("SECONDARY", "ROLES"): 1151 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1152 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1153 return self.expression( 1154 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1155 ) 1156 1157 return super()._parse_use() 1158 1159 def _negate_range( 1160 self, this: t.Optional[exp.Expression] = None 1161 ) -> t.Optional[exp.Expression]: 1162 if not this: 1163 return this 1164 1165 query = this.args.get("query") 1166 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1167 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1168 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1169 # which can produce different results (most likely a SnowFlake bug). 1170 # 1171 # https://docs.snowflake.com/en/sql-reference/functions/in 1172 # Context: https://github.com/tobymao/sqlglot/issues/3890 1173 return self.expression( 1174 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1175 ) 1176 1177 return self.expression(exp.Not, this=this) 1178 1179 def _parse_tag(self) -> exp.Tags: 1180 return self.expression( 1181 exp.Tags, 1182 expressions=self._parse_wrapped_csv(self._parse_property), 1183 ) 1184 1185 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1186 if self._prev.token_type != TokenType.WITH: 1187 self._retreat(self._index - 1) 1188 1189 if self._match_text_seq("MASKING", "POLICY"): 1190 policy = self._parse_column() 1191 return self.expression( 1192 exp.MaskingPolicyColumnConstraint, 1193 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1194 expressions=self._match(TokenType.USING) 1195 and self._parse_wrapped_csv(self._parse_id_var), 1196 ) 1197 if self._match_text_seq("PROJECTION", "POLICY"): 1198 policy = self._parse_column() 1199 return self.expression( 1200 exp.ProjectionPolicyColumnConstraint, 1201 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1202 ) 1203 if self._match(TokenType.TAG): 1204 return self._parse_tag() 1205 1206 return None 1207 1208 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1209 if self._match(TokenType.TAG): 1210 return self._parse_tag() 1211 1212 return super()._parse_with_property() 1213 1214 def _parse_create(self) -> exp.Create | exp.Command: 1215 expression = super()._parse_create() 1216 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1217 # Replace the Table node with the enclosed Identifier 1218 expression.this.replace(expression.this.this) 1219 1220 return expression 1221 1222 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1223 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1224 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1225 this = self._parse_var() or self._parse_type() 1226 1227 if not this: 1228 return None 1229 1230 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1231 expression = ( 1232 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1233 ) 1234 return self.expression( 1235 exp.Extract, this=map_date_part(this, self.dialect), expression=expression 1236 ) 1237 1238 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1239 if is_map: 1240 # Keys are strings in Snowflake's objects, see also: 1241 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1242 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1243 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1244 1245 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1246 1247 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1248 lateral = super()._parse_lateral() 1249 if not lateral: 1250 return lateral 1251 1252 if isinstance(lateral.this, exp.Explode): 1253 table_alias = lateral.args.get("alias") 1254 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1255 if table_alias and not table_alias.args.get("columns"): 1256 table_alias.set("columns", columns) 1257 elif not table_alias: 1258 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1259 1260 return lateral 1261 1262 def _parse_table_parts( 1263 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1264 ) -> exp.Table: 1265 # https://docs.snowflake.com/en/user-guide/querying-stage 1266 if self._match(TokenType.STRING, advance=False): 1267 table = self._parse_string() 1268 elif self._match_text_seq("@", advance=False): 1269 table = self._parse_location_path() 1270 else: 1271 table = None 1272 1273 if table: 1274 file_format = None 1275 pattern = None 1276 1277 wrapped = self._match(TokenType.L_PAREN) 1278 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1279 if self._match_text_seq("FILE_FORMAT", "=>"): 1280 file_format = self._parse_string() or super()._parse_table_parts( 1281 is_db_reference=is_db_reference 1282 ) 1283 elif self._match_text_seq("PATTERN", "=>"): 1284 pattern = self._parse_string() 1285 else: 1286 break 1287 1288 self._match(TokenType.COMMA) 1289 1290 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1291 else: 1292 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1293 1294 return table 1295 1296 def _parse_table( 1297 self, 1298 schema: bool = False, 1299 joins: bool = False, 1300 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1301 parse_bracket: bool = False, 1302 is_db_reference: bool = False, 1303 parse_partition: bool = False, 1304 consume_pipe: bool = False, 1305 ) -> t.Optional[exp.Expression]: 1306 table = super()._parse_table( 1307 schema=schema, 1308 joins=joins, 1309 alias_tokens=alias_tokens, 1310 parse_bracket=parse_bracket, 1311 is_db_reference=is_db_reference, 1312 parse_partition=parse_partition, 1313 ) 1314 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1315 table_from_rows = table.this 1316 for arg in exp.TableFromRows.arg_types: 1317 if arg != "this": 1318 table_from_rows.set(arg, table.args.get(arg)) 1319 1320 table = table_from_rows 1321 1322 return table 1323 1324 def _parse_id_var( 1325 self, 1326 any_token: bool = True, 1327 tokens: t.Optional[t.Collection[TokenType]] = None, 1328 ) -> t.Optional[exp.Expression]: 1329 if self._match_text_seq("IDENTIFIER", "("): 1330 identifier = ( 1331 super()._parse_id_var(any_token=any_token, tokens=tokens) 1332 or self._parse_string() 1333 ) 1334 self._match_r_paren() 1335 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1336 1337 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1338 1339 def _parse_show_snowflake(self, this: str) -> exp.Show: 1340 scope = None 1341 scope_kind = None 1342 1343 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1344 # which is syntactically valid but has no effect on the output 1345 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1346 1347 history = self._match_text_seq("HISTORY") 1348 1349 like = self._parse_string() if self._match(TokenType.LIKE) else None 1350 1351 if self._match(TokenType.IN): 1352 if self._match_text_seq("ACCOUNT"): 1353 scope_kind = "ACCOUNT" 1354 elif self._match_text_seq("CLASS"): 1355 scope_kind = "CLASS" 1356 scope = self._parse_table_parts() 1357 elif self._match_text_seq("APPLICATION"): 1358 scope_kind = "APPLICATION" 1359 if self._match_text_seq("PACKAGE"): 1360 scope_kind += " PACKAGE" 1361 scope = self._parse_table_parts() 1362 elif self._match_set(self.DB_CREATABLES): 1363 scope_kind = self._prev.text.upper() 1364 if self._curr: 1365 scope = self._parse_table_parts() 1366 elif self._curr: 1367 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1368 scope = self._parse_table_parts() 1369 1370 return self.expression( 1371 exp.Show, 1372 terse=terse, 1373 this=this, 1374 history=history, 1375 like=like, 1376 scope=scope, 1377 scope_kind=scope_kind, 1378 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1379 limit=self._parse_limit(), 1380 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1381 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1382 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1383 ) 1384 1385 def _parse_put(self) -> exp.Put | exp.Command: 1386 if self._curr.token_type != TokenType.STRING: 1387 return self._parse_as_command(self._prev) 1388 1389 return self.expression( 1390 exp.Put, 1391 this=self._parse_string(), 1392 target=self._parse_location_path(), 1393 properties=self._parse_properties(), 1394 ) 1395 1396 def _parse_get(self) -> t.Optional[exp.Expression]: 1397 start = self._prev 1398 1399 # If we detect GET( then we need to parse a function, not a statement 1400 if self._match(TokenType.L_PAREN): 1401 self._retreat(self._index - 2) 1402 return self._parse_expression() 1403 1404 target = self._parse_location_path() 1405 1406 # Parse as command if unquoted file path 1407 if self._curr.token_type == TokenType.URI_START: 1408 return self._parse_as_command(start) 1409 1410 return self.expression( 1411 exp.Get, 1412 this=self._parse_string(), 1413 target=target, 1414 properties=self._parse_properties(), 1415 ) 1416 1417 def _parse_location_property(self) -> exp.LocationProperty: 1418 self._match(TokenType.EQ) 1419 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1420 1421 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1422 # Parse either a subquery or a staged file 1423 return ( 1424 self._parse_select(table=True, parse_subquery_alias=False) 1425 if self._match(TokenType.L_PAREN, advance=False) 1426 else self._parse_table_parts() 1427 ) 1428 1429 def _parse_location_path(self) -> exp.Var: 1430 start = self._curr 1431 self._advance_any(ignore_reserved=True) 1432 1433 # We avoid consuming a comma token because external tables like @foo and @bar 1434 # can be joined in a query with a comma separator, as well as closing paren 1435 # in case of subqueries 1436 while self._is_connected() and not self._match_set( 1437 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1438 ): 1439 self._advance_any(ignore_reserved=True) 1440 1441 return exp.var(self._find_sql(start, self._prev)) 1442 1443 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1444 this = super()._parse_lambda_arg() 1445 1446 if not this: 1447 return this 1448 1449 typ = self._parse_types() 1450 1451 if typ: 1452 return self.expression(exp.Cast, this=this, to=typ) 1453 1454 return this 1455 1456 def _parse_foreign_key(self) -> exp.ForeignKey: 1457 # inlineFK, the REFERENCES columns are implied 1458 if self._match(TokenType.REFERENCES, advance=False): 1459 return self.expression(exp.ForeignKey) 1460 1461 # outoflineFK, explicitly names the columns 1462 return super()._parse_foreign_key() 1463 1464 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1465 self._match(TokenType.EQ) 1466 if self._match(TokenType.L_PAREN, advance=False): 1467 expressions = self._parse_wrapped_options() 1468 else: 1469 expressions = [self._parse_format_name()] 1470 1471 return self.expression( 1472 exp.FileFormatProperty, 1473 expressions=expressions, 1474 ) 1475 1476 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1477 return self.expression( 1478 exp.CredentialsProperty, 1479 expressions=self._parse_wrapped_options(), 1480 ) 1481 1482 def _parse_semantic_view(self) -> exp.SemanticView: 1483 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1484 1485 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1486 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1487 keyword = self._prev.text.lower() 1488 kwargs[keyword] = self._parse_csv(self._parse_disjunction) 1489 elif self._match_text_seq("WHERE"): 1490 kwargs["where"] = self._parse_expression() 1491 else: 1492 self.raise_error("Expecting ) or encountered unexpected keyword") 1493 break 1494 1495 return self.expression(exp.SemanticView, **kwargs) 1496 1497 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1498 set = super()._parse_set(unset=unset, tag=tag) 1499 1500 if isinstance(set, exp.Set): 1501 for expr in set.expressions: 1502 if isinstance(expr, exp.SetItem): 1503 expr.set("kind", "VARIABLE") 1504 return set
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1506 class Tokenizer(tokens.Tokenizer): 1507 STRING_ESCAPES = ["\\", "'"] 1508 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1509 RAW_STRINGS = ["$$"] 1510 COMMENTS = ["--", "//", ("/*", "*/")] 1511 NESTED_COMMENTS = False 1512 1513 KEYWORDS = { 1514 **tokens.Tokenizer.KEYWORDS, 1515 "BYTEINT": TokenType.INT, 1516 "FILE://": TokenType.URI_START, 1517 "FILE FORMAT": TokenType.FILE_FORMAT, 1518 "GET": TokenType.GET, 1519 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1520 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1521 "MINUS": TokenType.EXCEPT, 1522 "NCHAR VARYING": TokenType.VARCHAR, 1523 "PUT": TokenType.PUT, 1524 "REMOVE": TokenType.COMMAND, 1525 "RM": TokenType.COMMAND, 1526 "SAMPLE": TokenType.TABLE_SAMPLE, 1527 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1528 "SQL_DOUBLE": TokenType.DOUBLE, 1529 "SQL_VARCHAR": TokenType.VARCHAR, 1530 "STAGE": TokenType.STAGE, 1531 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1532 "STREAMLIT": TokenType.STREAMLIT, 1533 "TAG": TokenType.TAG, 1534 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1535 "TOP": TokenType.TOP, 1536 "WAREHOUSE": TokenType.WAREHOUSE, 1537 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1538 # FLOAT is a synonym for DOUBLE in Snowflake 1539 "FLOAT": TokenType.DOUBLE, 1540 } 1541 KEYWORDS.pop("/*+") 1542 1543 SINGLE_TOKENS = { 1544 **tokens.Tokenizer.SINGLE_TOKENS, 1545 "$": TokenType.PARAMETER, 1546 "!": TokenType.EXCLAMATION, 1547 } 1548 1549 VAR_SINGLE_TOKENS = {"$"} 1550 1551 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1553 class Generator(generator.Generator): 1554 PARAMETER_TOKEN = "$" 1555 MATCHED_BY_SOURCE = False 1556 SINGLE_STRING_INTERVAL = True 1557 JOIN_HINTS = False 1558 TABLE_HINTS = False 1559 QUERY_HINTS = False 1560 AGGREGATE_FILTER_SUPPORTED = False 1561 SUPPORTS_TABLE_COPY = False 1562 COLLATE_IS_FUNC = True 1563 LIMIT_ONLY_LITERALS = True 1564 JSON_KEY_VALUE_PAIR_SEP = "," 1565 INSERT_OVERWRITE = " OVERWRITE INTO" 1566 STRUCT_DELIMITER = ("(", ")") 1567 COPY_PARAMS_ARE_WRAPPED = False 1568 COPY_PARAMS_EQ_REQUIRED = True 1569 STAR_EXCEPT = "EXCLUDE" 1570 SUPPORTS_EXPLODING_PROJECTIONS = False 1571 ARRAY_CONCAT_IS_VAR_LEN = False 1572 SUPPORTS_CONVERT_TIMEZONE = True 1573 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1574 SUPPORTS_MEDIAN = True 1575 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1576 SUPPORTS_DECODE_CASE = True 1577 IS_BOOL_ALLOWED = False 1578 DIRECTED_JOINS = True 1579 1580 TRANSFORMS = { 1581 **generator.Generator.TRANSFORMS, 1582 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1583 exp.ArgMax: rename_func("MAX_BY"), 1584 exp.ArgMin: rename_func("MIN_BY"), 1585 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1586 exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), 1587 exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), 1588 exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), 1589 exp.ArrayContains: lambda self, e: self.func( 1590 "ARRAY_CONTAINS", 1591 e.expression 1592 if e.args.get("ensure_variant") is False 1593 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1594 e.this, 1595 ), 1596 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1597 exp.AtTimeZone: lambda self, e: self.func( 1598 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1599 ), 1600 exp.BitwiseOr: rename_func("BITOR"), 1601 exp.BitwiseXor: rename_func("BITXOR"), 1602 exp.BitwiseAnd: rename_func("BITAND"), 1603 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1604 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1605 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1606 exp.BitwiseNot: rename_func("BITNOT"), 1607 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1608 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1609 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1610 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1611 if e.args.get("sysdate") 1612 else self.function_fallback_sql(e), 1613 exp.Localtime: lambda self, e: self.func("CURRENT_TIME", e.this) 1614 if e.this 1615 else "CURRENT_TIME", 1616 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1617 if e.this 1618 else "CURRENT_TIMESTAMP", 1619 exp.DateAdd: date_delta_sql("DATEADD"), 1620 exp.DateDiff: date_delta_sql("DATEDIFF"), 1621 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1622 exp.DatetimeDiff: timestampdiff_sql, 1623 exp.DateStrToDate: datestrtodate_sql, 1624 exp.Decrypt: lambda self, e: self.func( 1625 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1626 e.this, 1627 e.args.get("passphrase"), 1628 e.args.get("aad"), 1629 e.args.get("encryption_method"), 1630 ), 1631 exp.DecryptRaw: lambda self, e: self.func( 1632 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1633 e.this, 1634 e.args.get("key"), 1635 e.args.get("iv"), 1636 e.args.get("aad"), 1637 e.args.get("encryption_method"), 1638 e.args.get("aead"), 1639 ), 1640 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1641 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1642 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1643 exp.DayOfYear: rename_func("DAYOFYEAR"), 1644 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1645 exp.Explode: rename_func("FLATTEN"), 1646 exp.Extract: lambda self, e: self.func( 1647 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1648 ), 1649 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1650 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1651 exp.FileFormatProperty: lambda self, 1652 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1653 exp.FromTimeZone: lambda self, e: self.func( 1654 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1655 ), 1656 exp.GenerateSeries: lambda self, e: self.func( 1657 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1658 ), 1659 exp.GetExtract: rename_func("GET"), 1660 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1661 exp.If: if_sql(name="IFF", false_value="NULL"), 1662 exp.JSONExtractArray: _json_extract_value_array_sql, 1663 exp.JSONExtractScalar: lambda self, e: self.func( 1664 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1665 ), 1666 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1667 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1668 exp.JSONPathRoot: lambda *_: "", 1669 exp.JSONValueArray: _json_extract_value_array_sql, 1670 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1671 rename_func("EDITDISTANCE") 1672 ), 1673 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1674 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1675 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1676 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1677 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1678 exp.MakeInterval: no_make_interval_sql, 1679 exp.Max: max_or_greatest, 1680 exp.Min: min_or_least, 1681 exp.ParseJSON: lambda self, e: self.func( 1682 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1683 ), 1684 exp.ToBinary: lambda self, e: self.func( 1685 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1686 ), 1687 exp.ToBoolean: lambda self, e: self.func( 1688 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1689 ), 1690 exp.ToDouble: lambda self, e: self.func( 1691 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1692 ), 1693 exp.ToFile: lambda self, e: self.func( 1694 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1695 ), 1696 exp.ToNumber: lambda self, e: self.func( 1697 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1698 e.this, 1699 e.args.get("format"), 1700 e.args.get("precision"), 1701 e.args.get("scale"), 1702 ), 1703 exp.JSONFormat: rename_func("TO_JSON"), 1704 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1705 exp.PercentileCont: transforms.preprocess( 1706 [transforms.add_within_group_for_percentiles] 1707 ), 1708 exp.PercentileDisc: transforms.preprocess( 1709 [transforms.add_within_group_for_percentiles] 1710 ), 1711 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1712 exp.RegexpExtract: _regexpextract_sql, 1713 exp.RegexpExtractAll: _regexpextract_sql, 1714 exp.RegexpILike: _regexpilike_sql, 1715 exp.Rand: rename_func("RANDOM"), 1716 exp.Select: transforms.preprocess( 1717 [ 1718 transforms.eliminate_window_clause, 1719 transforms.eliminate_distinct_on, 1720 transforms.explode_projection_to_unnest(), 1721 transforms.eliminate_semi_and_anti_joins, 1722 _transform_generate_date_array, 1723 _qualify_unnested_columns, 1724 _eliminate_dot_variant_lookup, 1725 ] 1726 ), 1727 exp.SHA: rename_func("SHA1"), 1728 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1729 exp.MD5Digest: rename_func("MD5_BINARY"), 1730 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1731 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1732 exp.LowerHex: rename_func("TO_CHAR"), 1733 exp.SortArray: rename_func("ARRAY_SORT"), 1734 exp.Skewness: rename_func("SKEW"), 1735 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1736 exp.StartsWith: rename_func("STARTSWITH"), 1737 exp.EndsWith: rename_func("ENDSWITH"), 1738 exp.StrPosition: lambda self, e: strposition_sql( 1739 self, e, func_name="CHARINDEX", supports_position=True 1740 ), 1741 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1742 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1743 exp.Stuff: rename_func("INSERT"), 1744 exp.StPoint: rename_func("ST_MAKEPOINT"), 1745 exp.TimeAdd: date_delta_sql("TIMEADD"), 1746 exp.TimeSlice: lambda self, e: self.func( 1747 "TIME_SLICE", 1748 e.this, 1749 e.expression, 1750 unit_to_str(e), 1751 e.args.get("kind"), 1752 ), 1753 exp.Timestamp: no_timestamp_sql, 1754 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1755 exp.TimestampDiff: lambda self, e: self.func( 1756 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1757 ), 1758 exp.TimestampTrunc: timestamptrunc_sql(), 1759 exp.TimeStrToTime: timestrtotime_sql, 1760 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1761 exp.ToArray: rename_func("TO_ARRAY"), 1762 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1763 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1764 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1765 exp.TsOrDsToDate: lambda self, e: self.func( 1766 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1767 ), 1768 exp.TsOrDsToTime: lambda self, e: self.func( 1769 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1770 ), 1771 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1772 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1773 exp.Uuid: rename_func("UUID_STRING"), 1774 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1775 exp.Booland: rename_func("BOOLAND"), 1776 exp.Boolor: rename_func("BOOLOR"), 1777 exp.WeekOfYear: rename_func("WEEKISO"), 1778 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1779 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1780 exp.Xor: rename_func("BOOLXOR"), 1781 exp.ByteLength: rename_func("OCTET_LENGTH"), 1782 exp.Flatten: rename_func("ARRAY_FLATTEN"), 1783 exp.ArrayConcatAgg: lambda self, e: self.func( 1784 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1785 ), 1786 exp.SHA2Digest: lambda self, e: self.func( 1787 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1788 ), 1789 } 1790 1791 SUPPORTED_JSON_PATH_PARTS = { 1792 exp.JSONPathKey, 1793 exp.JSONPathRoot, 1794 exp.JSONPathSubscript, 1795 } 1796 1797 TYPE_MAPPING = { 1798 **generator.Generator.TYPE_MAPPING, 1799 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1800 exp.DataType.Type.NESTED: "OBJECT", 1801 exp.DataType.Type.STRUCT: "OBJECT", 1802 exp.DataType.Type.TEXT: "VARCHAR", 1803 } 1804 1805 TOKEN_MAPPING = { 1806 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1807 } 1808 1809 PROPERTIES_LOCATION = { 1810 **generator.Generator.PROPERTIES_LOCATION, 1811 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1812 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1813 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1814 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1815 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1816 } 1817 1818 UNSUPPORTED_VALUES_EXPRESSIONS = { 1819 exp.Map, 1820 exp.StarMap, 1821 exp.Struct, 1822 exp.VarMap, 1823 } 1824 1825 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1826 1827 def with_properties(self, properties: exp.Properties) -> str: 1828 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1829 1830 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1831 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1832 values_as_table = False 1833 1834 return super().values_sql(expression, values_as_table=values_as_table) 1835 1836 def datatype_sql(self, expression: exp.DataType) -> str: 1837 # Check if this is a FLOAT type nested inside a VECTOR type 1838 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1839 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1840 if expression.is_type(exp.DataType.Type.DOUBLE): 1841 parent = expression.parent 1842 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1843 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1844 return "FLOAT" 1845 1846 expressions = expression.expressions 1847 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1848 for field_type in expressions: 1849 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1850 if isinstance(field_type, exp.DataType): 1851 return "OBJECT" 1852 if ( 1853 isinstance(field_type, exp.ColumnDef) 1854 and field_type.this 1855 and field_type.this.is_string 1856 ): 1857 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1858 # converting 'foo' into an identifier, we also need to quote it because these 1859 # keys are case-sensitive. For example: 1860 # 1861 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1862 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1863 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1864 1865 return super().datatype_sql(expression) 1866 1867 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1868 return self.func( 1869 "TO_NUMBER", 1870 expression.this, 1871 expression.args.get("format"), 1872 expression.args.get("precision"), 1873 expression.args.get("scale"), 1874 ) 1875 1876 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1877 milli = expression.args.get("milli") 1878 if milli is not None: 1879 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1880 expression.set("nano", milli_to_nano) 1881 1882 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1883 1884 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1885 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1886 return self.func("TO_GEOGRAPHY", expression.this) 1887 if expression.is_type(exp.DataType.Type.GEOMETRY): 1888 return self.func("TO_GEOMETRY", expression.this) 1889 1890 return super().cast_sql(expression, safe_prefix=safe_prefix) 1891 1892 def trycast_sql(self, expression: exp.TryCast) -> str: 1893 value = expression.this 1894 1895 if value.type is None: 1896 from sqlglot.optimizer.annotate_types import annotate_types 1897 1898 value = annotate_types(value, dialect=self.dialect) 1899 1900 # Snowflake requires that TRY_CAST's value be a string 1901 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1902 # if we can deduce that the value is a string, then we can generate TRY_CAST 1903 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1904 return super().trycast_sql(expression) 1905 1906 return self.cast_sql(expression) 1907 1908 def log_sql(self, expression: exp.Log) -> str: 1909 if not expression.expression: 1910 return self.func("LN", expression.this) 1911 1912 return super().log_sql(expression) 1913 1914 def greatest_sql(self, expression: exp.Greatest) -> str: 1915 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1916 return self.func(name, expression.this, *expression.expressions) 1917 1918 def least_sql(self, expression: exp.Least) -> str: 1919 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1920 return self.func(name, expression.this, *expression.expressions) 1921 1922 def generator_sql(self, expression: exp.Generator) -> str: 1923 args = [] 1924 rowcount = expression.args.get("rowcount") 1925 time_limit = expression.args.get("time_limit") 1926 1927 if rowcount: 1928 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1929 if time_limit: 1930 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1931 1932 return self.func("GENERATOR", *args) 1933 1934 def unnest_sql(self, expression: exp.Unnest) -> str: 1935 unnest_alias = expression.args.get("alias") 1936 offset = expression.args.get("offset") 1937 1938 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1939 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1940 1941 columns = [ 1942 exp.to_identifier("seq"), 1943 exp.to_identifier("key"), 1944 exp.to_identifier("path"), 1945 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1946 value, 1947 exp.to_identifier("this"), 1948 ] 1949 1950 if unnest_alias: 1951 unnest_alias.set("columns", columns) 1952 else: 1953 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1954 1955 table_input = self.sql(expression.expressions[0]) 1956 if not table_input.startswith("INPUT =>"): 1957 table_input = f"INPUT => {table_input}" 1958 1959 expression_parent = expression.parent 1960 1961 explode = ( 1962 f"FLATTEN({table_input})" 1963 if isinstance(expression_parent, exp.Lateral) 1964 else f"TABLE(FLATTEN({table_input}))" 1965 ) 1966 alias = self.sql(unnest_alias) 1967 alias = f" AS {alias}" if alias else "" 1968 value = ( 1969 "" 1970 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1971 else f"{value} FROM " 1972 ) 1973 1974 return f"{value}{explode}{alias}" 1975 1976 def show_sql(self, expression: exp.Show) -> str: 1977 terse = "TERSE " if expression.args.get("terse") else "" 1978 history = " HISTORY" if expression.args.get("history") else "" 1979 like = self.sql(expression, "like") 1980 like = f" LIKE {like}" if like else "" 1981 1982 scope = self.sql(expression, "scope") 1983 scope = f" {scope}" if scope else "" 1984 1985 scope_kind = self.sql(expression, "scope_kind") 1986 if scope_kind: 1987 scope_kind = f" IN {scope_kind}" 1988 1989 starts_with = self.sql(expression, "starts_with") 1990 if starts_with: 1991 starts_with = f" STARTS WITH {starts_with}" 1992 1993 limit = self.sql(expression, "limit") 1994 1995 from_ = self.sql(expression, "from_") 1996 if from_: 1997 from_ = f" FROM {from_}" 1998 1999 privileges = self.expressions(expression, key="privileges", flat=True) 2000 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2001 2002 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 2003 2004 def describe_sql(self, expression: exp.Describe) -> str: 2005 # Default to table if kind is unknown 2006 kind_value = expression.args.get("kind") or "TABLE" 2007 kind = f" {kind_value}" if kind_value else "" 2008 this = f" {self.sql(expression, 'this')}" 2009 expressions = self.expressions(expression, flat=True) 2010 expressions = f" {expressions}" if expressions else "" 2011 return f"DESCRIBE{kind}{this}{expressions}" 2012 2013 def generatedasidentitycolumnconstraint_sql( 2014 self, expression: exp.GeneratedAsIdentityColumnConstraint 2015 ) -> str: 2016 start = expression.args.get("start") 2017 start = f" START {start}" if start else "" 2018 increment = expression.args.get("increment") 2019 increment = f" INCREMENT {increment}" if increment else "" 2020 2021 order = expression.args.get("order") 2022 if order is not None: 2023 order_clause = " ORDER" if order else " NOORDER" 2024 else: 2025 order_clause = "" 2026 2027 return f"AUTOINCREMENT{start}{increment}{order_clause}" 2028 2029 def cluster_sql(self, expression: exp.Cluster) -> str: 2030 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 2031 2032 def struct_sql(self, expression: exp.Struct) -> str: 2033 if len(expression.expressions) == 1: 2034 arg = expression.expressions[0] 2035 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2036 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2037 return f"{{{self.sql(expression.expressions[0])}}}" 2038 2039 keys = [] 2040 values = [] 2041 2042 for i, e in enumerate(expression.expressions): 2043 if isinstance(e, exp.PropertyEQ): 2044 keys.append( 2045 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2046 ) 2047 values.append(e.expression) 2048 else: 2049 keys.append(exp.Literal.string(f"_{i}")) 2050 values.append(e) 2051 2052 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 2053 2054 @unsupported_args("weight", "accuracy") 2055 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 2056 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 2057 2058 def alterset_sql(self, expression: exp.AlterSet) -> str: 2059 exprs = self.expressions(expression, flat=True) 2060 exprs = f" {exprs}" if exprs else "" 2061 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2062 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2063 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2064 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2065 tag = self.expressions(expression, key="tag", flat=True) 2066 tag = f" TAG {tag}" if tag else "" 2067 2068 return f"SET{exprs}{file_format}{copy_options}{tag}" 2069 2070 def strtotime_sql(self, expression: exp.StrToTime): 2071 # target_type is stored as a DataType instance 2072 target_type = expression.args.get("target_type") 2073 2074 # Get the type enum from DataType instance or from type annotation 2075 if isinstance(target_type, exp.DataType): 2076 type_enum = target_type.this 2077 elif expression.type: 2078 type_enum = expression.type.this 2079 else: 2080 type_enum = exp.DataType.Type.TIMESTAMP 2081 2082 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2083 2084 return self.func( 2085 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2086 expression.this, 2087 self.format_time(expression), 2088 ) 2089 2090 def timestampsub_sql(self, expression: exp.TimestampSub): 2091 return self.sql( 2092 exp.TimestampAdd( 2093 this=expression.this, 2094 expression=expression.expression * -1, 2095 unit=expression.unit, 2096 ) 2097 ) 2098 2099 def jsonextract_sql(self, expression: exp.JSONExtract): 2100 this = expression.this 2101 2102 # JSON strings are valid coming from other dialects such as BQ so 2103 # for these cases we PARSE_JSON preemptively 2104 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2105 "requires_json" 2106 ): 2107 this = exp.ParseJSON(this=this) 2108 2109 return self.func( 2110 "GET_PATH", 2111 this, 2112 expression.expression, 2113 ) 2114 2115 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2116 this = expression.this 2117 if this.is_string: 2118 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2119 2120 return self.func("TO_CHAR", this, self.format_time(expression)) 2121 2122 def datesub_sql(self, expression: exp.DateSub) -> str: 2123 value = expression.expression 2124 if value: 2125 value.replace(value * (-1)) 2126 else: 2127 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2128 2129 return date_delta_sql("DATEADD")(self, expression) 2130 2131 def select_sql(self, expression: exp.Select) -> str: 2132 limit = expression.args.get("limit") 2133 offset = expression.args.get("offset") 2134 if offset and not limit: 2135 expression.limit(exp.Null(), copy=False) 2136 return super().select_sql(expression) 2137 2138 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2139 is_materialized = expression.find(exp.MaterializedProperty) 2140 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2141 2142 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2143 # For materialized views, COPY GRANTS is located *before* the columns list 2144 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2145 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2146 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2147 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2148 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2149 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2150 2151 this_name = self.sql(expression.this, "this") 2152 copy_grants = self.sql(copy_grants_property) 2153 this_schema = self.schema_columns_sql(expression.this) 2154 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2155 2156 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2157 2158 return super().createable_sql(expression, locations) 2159 2160 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2161 this = expression.this 2162 2163 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2164 # and add it later as part of the WITHIN GROUP clause 2165 order = this if isinstance(this, exp.Order) else None 2166 if order: 2167 expression.set("this", order.this.pop()) 2168 2169 expr_sql = super().arrayagg_sql(expression) 2170 2171 if order: 2172 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2173 2174 return expr_sql 2175 2176 def array_sql(self, expression: exp.Array) -> str: 2177 expressions = expression.expressions 2178 2179 first_expr = seq_get(expressions, 0) 2180 if isinstance(first_expr, exp.Select): 2181 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2182 if first_expr.text("kind").upper() == "STRUCT": 2183 object_construct_args = [] 2184 for expr in first_expr.expressions: 2185 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2186 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2187 name = expr.this if isinstance(expr, exp.Alias) else expr 2188 2189 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2190 2191 array_agg = exp.ArrayAgg( 2192 this=_build_object_construct(args=object_construct_args) 2193 ) 2194 2195 first_expr.set("kind", None) 2196 first_expr.set("expressions", [array_agg]) 2197 2198 return self.sql(first_expr.subquery()) 2199 2200 return inline_array_sql(self, expression) 2201 2202 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2203 zone = self.sql(expression, "this") 2204 if not zone: 2205 return super().currentdate_sql(expression) 2206 2207 expr = exp.Cast( 2208 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2209 to=exp.DataType(this=exp.DataType.Type.DATE), 2210 ) 2211 return self.sql(expr) 2212 2213 def dot_sql(self, expression: exp.Dot) -> str: 2214 this = expression.this 2215 2216 if not this.type: 2217 from sqlglot.optimizer.annotate_types import annotate_types 2218 2219 this = annotate_types(this, dialect=self.dialect) 2220 2221 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2222 # Generate colon notation for the top level STRUCT 2223 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2224 2225 return super().dot_sql(expression) 2226 2227 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2228 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2229 2230 def format_sql(self, expression: exp.Format) -> str: 2231 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2232 return self.func("TO_CHAR", expression.expressions[0]) 2233 2234 return self.function_fallback_sql(expression) 2235 2236 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2237 # Set part_index to 1 if missing 2238 if not expression.args.get("delimiter"): 2239 expression.set("delimiter", exp.Literal.string(" ")) 2240 2241 if not expression.args.get("part_index"): 2242 expression.set("part_index", exp.Literal.number(1)) 2243 2244 return rename_func("SPLIT_PART")(self, expression) 2245 2246 def uniform_sql(self, expression: exp.Uniform) -> str: 2247 gen = expression.args.get("gen") 2248 seed = expression.args.get("seed") 2249 2250 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2251 if seed: 2252 gen = exp.Rand(this=seed) 2253 2254 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2255 if not gen: 2256 gen = exp.Rand() 2257 2258 return self.func("UNIFORM", expression.this, expression.expression, gen)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1836 def datatype_sql(self, expression: exp.DataType) -> str: 1837 # Check if this is a FLOAT type nested inside a VECTOR type 1838 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1839 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1840 if expression.is_type(exp.DataType.Type.DOUBLE): 1841 parent = expression.parent 1842 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1843 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1844 return "FLOAT" 1845 1846 expressions = expression.expressions 1847 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1848 for field_type in expressions: 1849 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1850 if isinstance(field_type, exp.DataType): 1851 return "OBJECT" 1852 if ( 1853 isinstance(field_type, exp.ColumnDef) 1854 and field_type.this 1855 and field_type.this.is_string 1856 ): 1857 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1858 # converting 'foo' into an identifier, we also need to quote it because these 1859 # keys are case-sensitive. For example: 1860 # 1861 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1862 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1863 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1864 1865 return super().datatype_sql(expression)
1876 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1877 milli = expression.args.get("milli") 1878 if milli is not None: 1879 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1880 expression.set("nano", milli_to_nano) 1881 1882 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1884 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1885 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1886 return self.func("TO_GEOGRAPHY", expression.this) 1887 if expression.is_type(exp.DataType.Type.GEOMETRY): 1888 return self.func("TO_GEOMETRY", expression.this) 1889 1890 return super().cast_sql(expression, safe_prefix=safe_prefix)
1892 def trycast_sql(self, expression: exp.TryCast) -> str: 1893 value = expression.this 1894 1895 if value.type is None: 1896 from sqlglot.optimizer.annotate_types import annotate_types 1897 1898 value = annotate_types(value, dialect=self.dialect) 1899 1900 # Snowflake requires that TRY_CAST's value be a string 1901 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1902 # if we can deduce that the value is a string, then we can generate TRY_CAST 1903 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1904 return super().trycast_sql(expression) 1905 1906 return self.cast_sql(expression)
1922 def generator_sql(self, expression: exp.Generator) -> str: 1923 args = [] 1924 rowcount = expression.args.get("rowcount") 1925 time_limit = expression.args.get("time_limit") 1926 1927 if rowcount: 1928 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1929 if time_limit: 1930 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1931 1932 return self.func("GENERATOR", *args)
1934 def unnest_sql(self, expression: exp.Unnest) -> str: 1935 unnest_alias = expression.args.get("alias") 1936 offset = expression.args.get("offset") 1937 1938 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1939 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1940 1941 columns = [ 1942 exp.to_identifier("seq"), 1943 exp.to_identifier("key"), 1944 exp.to_identifier("path"), 1945 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1946 value, 1947 exp.to_identifier("this"), 1948 ] 1949 1950 if unnest_alias: 1951 unnest_alias.set("columns", columns) 1952 else: 1953 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1954 1955 table_input = self.sql(expression.expressions[0]) 1956 if not table_input.startswith("INPUT =>"): 1957 table_input = f"INPUT => {table_input}" 1958 1959 expression_parent = expression.parent 1960 1961 explode = ( 1962 f"FLATTEN({table_input})" 1963 if isinstance(expression_parent, exp.Lateral) 1964 else f"TABLE(FLATTEN({table_input}))" 1965 ) 1966 alias = self.sql(unnest_alias) 1967 alias = f" AS {alias}" if alias else "" 1968 value = ( 1969 "" 1970 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1971 else f"{value} FROM " 1972 ) 1973 1974 return f"{value}{explode}{alias}"
1976 def show_sql(self, expression: exp.Show) -> str: 1977 terse = "TERSE " if expression.args.get("terse") else "" 1978 history = " HISTORY" if expression.args.get("history") else "" 1979 like = self.sql(expression, "like") 1980 like = f" LIKE {like}" if like else "" 1981 1982 scope = self.sql(expression, "scope") 1983 scope = f" {scope}" if scope else "" 1984 1985 scope_kind = self.sql(expression, "scope_kind") 1986 if scope_kind: 1987 scope_kind = f" IN {scope_kind}" 1988 1989 starts_with = self.sql(expression, "starts_with") 1990 if starts_with: 1991 starts_with = f" STARTS WITH {starts_with}" 1992 1993 limit = self.sql(expression, "limit") 1994 1995 from_ = self.sql(expression, "from_") 1996 if from_: 1997 from_ = f" FROM {from_}" 1998 1999 privileges = self.expressions(expression, key="privileges", flat=True) 2000 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2001 2002 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
2004 def describe_sql(self, expression: exp.Describe) -> str: 2005 # Default to table if kind is unknown 2006 kind_value = expression.args.get("kind") or "TABLE" 2007 kind = f" {kind_value}" if kind_value else "" 2008 this = f" {self.sql(expression, 'this')}" 2009 expressions = self.expressions(expression, flat=True) 2010 expressions = f" {expressions}" if expressions else "" 2011 return f"DESCRIBE{kind}{this}{expressions}"
2013 def generatedasidentitycolumnconstraint_sql( 2014 self, expression: exp.GeneratedAsIdentityColumnConstraint 2015 ) -> str: 2016 start = expression.args.get("start") 2017 start = f" START {start}" if start else "" 2018 increment = expression.args.get("increment") 2019 increment = f" INCREMENT {increment}" if increment else "" 2020 2021 order = expression.args.get("order") 2022 if order is not None: 2023 order_clause = " ORDER" if order else " NOORDER" 2024 else: 2025 order_clause = "" 2026 2027 return f"AUTOINCREMENT{start}{increment}{order_clause}"
2032 def struct_sql(self, expression: exp.Struct) -> str: 2033 if len(expression.expressions) == 1: 2034 arg = expression.expressions[0] 2035 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2036 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2037 return f"{{{self.sql(expression.expressions[0])}}}" 2038 2039 keys = [] 2040 values = [] 2041 2042 for i, e in enumerate(expression.expressions): 2043 if isinstance(e, exp.PropertyEQ): 2044 keys.append( 2045 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2046 ) 2047 values.append(e.expression) 2048 else: 2049 keys.append(exp.Literal.string(f"_{i}")) 2050 values.append(e) 2051 2052 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
2058 def alterset_sql(self, expression: exp.AlterSet) -> str: 2059 exprs = self.expressions(expression, flat=True) 2060 exprs = f" {exprs}" if exprs else "" 2061 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2062 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2063 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2064 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2065 tag = self.expressions(expression, key="tag", flat=True) 2066 tag = f" TAG {tag}" if tag else "" 2067 2068 return f"SET{exprs}{file_format}{copy_options}{tag}"
2070 def strtotime_sql(self, expression: exp.StrToTime): 2071 # target_type is stored as a DataType instance 2072 target_type = expression.args.get("target_type") 2073 2074 # Get the type enum from DataType instance or from type annotation 2075 if isinstance(target_type, exp.DataType): 2076 type_enum = target_type.this 2077 elif expression.type: 2078 type_enum = expression.type.this 2079 else: 2080 type_enum = exp.DataType.Type.TIMESTAMP 2081 2082 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2083 2084 return self.func( 2085 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2086 expression.this, 2087 self.format_time(expression), 2088 )
2099 def jsonextract_sql(self, expression: exp.JSONExtract): 2100 this = expression.this 2101 2102 # JSON strings are valid coming from other dialects such as BQ so 2103 # for these cases we PARSE_JSON preemptively 2104 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2105 "requires_json" 2106 ): 2107 this = exp.ParseJSON(this=this) 2108 2109 return self.func( 2110 "GET_PATH", 2111 this, 2112 expression.expression, 2113 )
2122 def datesub_sql(self, expression: exp.DateSub) -> str: 2123 value = expression.expression 2124 if value: 2125 value.replace(value * (-1)) 2126 else: 2127 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2128 2129 return date_delta_sql("DATEADD")(self, expression)
2138 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2139 is_materialized = expression.find(exp.MaterializedProperty) 2140 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2141 2142 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2143 # For materialized views, COPY GRANTS is located *before* the columns list 2144 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2145 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2146 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2147 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2148 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2149 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2150 2151 this_name = self.sql(expression.this, "this") 2152 copy_grants = self.sql(copy_grants_property) 2153 this_schema = self.schema_columns_sql(expression.this) 2154 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2155 2156 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2157 2158 return super().createable_sql(expression, locations)
2160 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2161 this = expression.this 2162 2163 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2164 # and add it later as part of the WITHIN GROUP clause 2165 order = this if isinstance(this, exp.Order) else None 2166 if order: 2167 expression.set("this", order.this.pop()) 2168 2169 expr_sql = super().arrayagg_sql(expression) 2170 2171 if order: 2172 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2173 2174 return expr_sql
2176 def array_sql(self, expression: exp.Array) -> str: 2177 expressions = expression.expressions 2178 2179 first_expr = seq_get(expressions, 0) 2180 if isinstance(first_expr, exp.Select): 2181 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2182 if first_expr.text("kind").upper() == "STRUCT": 2183 object_construct_args = [] 2184 for expr in first_expr.expressions: 2185 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2186 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2187 name = expr.this if isinstance(expr, exp.Alias) else expr 2188 2189 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2190 2191 array_agg = exp.ArrayAgg( 2192 this=_build_object_construct(args=object_construct_args) 2193 ) 2194 2195 first_expr.set("kind", None) 2196 first_expr.set("expressions", [array_agg]) 2197 2198 return self.sql(first_expr.subquery()) 2199 2200 return inline_array_sql(self, expression)
2202 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2203 zone = self.sql(expression, "this") 2204 if not zone: 2205 return super().currentdate_sql(expression) 2206 2207 expr = exp.Cast( 2208 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2209 to=exp.DataType(this=exp.DataType.Type.DATE), 2210 ) 2211 return self.sql(expr)
2213 def dot_sql(self, expression: exp.Dot) -> str: 2214 this = expression.this 2215 2216 if not this.type: 2217 from sqlglot.optimizer.annotate_types import annotate_types 2218 2219 this = annotate_types(this, dialect=self.dialect) 2220 2221 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2222 # Generate colon notation for the top level STRUCT 2223 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2224 2225 return super().dot_sql(expression)
2236 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2237 # Set part_index to 1 if missing 2238 if not expression.args.get("delimiter"): 2239 expression.set("delimiter", exp.Literal.string(" ")) 2240 2241 if not expression.args.get("part_index"): 2242 expression.set("part_index", exp.Literal.number(1)) 2243 2244 return rename_func("SPLIT_PART")(self, expression)
2246 def uniform_sql(self, expression: exp.Uniform) -> str: 2247 gen = expression.args.get("gen") 2248 seed = expression.args.get("seed") 2249 2250 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2251 if seed: 2252 gen = exp.Rand(this=seed) 2253 2254 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2255 if not gen: 2256 gen = exp.Rand() 2257 2258 return self.func("UNIFORM", expression.this, expression.expression, gen)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- INOUT_SEPARATOR
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql