sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 array_append_sql, 10 array_concat_sql, 11 binary_from_function, 12 build_default_decimal_type, 13 build_formatted_time, 14 build_like, 15 build_replace_with_optional_replacement, 16 build_timetostr_or_tochar, 17 build_trunc, 18 date_delta_sql, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 groupconcat_sql, 22 if_sql, 23 inline_array_sql, 24 map_date_part, 25 max_or_greatest, 26 min_or_least, 27 no_make_interval_sql, 28 no_timestamp_sql, 29 rename_func, 30 strposition_sql, 31 timestampdiff_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_str, 35 var_map_sql, 36) 37from sqlglot.generator import unsupported_args 38from sqlglot.helper import find_new_name, flatten, is_date_unit, is_int, seq_get 39from sqlglot.optimizer.scope import build_scope, find_all_in_scope 40from sqlglot.tokens import TokenType 41from sqlglot.typing.snowflake import EXPRESSION_METADATA 42 43if t.TYPE_CHECKING: 44 from sqlglot._typing import E, B 45 46 47# Timestamp types used in _build_datetime 48TIMESTAMP_TYPES = { 49 exp.DataType.Type.TIMESTAMP: "TO_TIMESTAMP", 50 exp.DataType.Type.TIMESTAMPLTZ: "TO_TIMESTAMP_LTZ", 51 exp.DataType.Type.TIMESTAMPNTZ: "TO_TIMESTAMP_NTZ", 52 exp.DataType.Type.TIMESTAMPTZ: "TO_TIMESTAMP_TZ", 53} 54 55 56def _build_strtok(args: t.List) -> exp.SplitPart: 57 # Add default delimiter (space) if missing - per Snowflake docs 58 if len(args) == 1: 59 args.append(exp.Literal.string(" ")) 60 61 # Add default part_index (1) if missing 62 if len(args) == 2: 63 args.append(exp.Literal.number(1)) 64 65 return exp.SplitPart.from_arg_list(args) 66 67 68def _build_approx_top_k(args: t.List) -> exp.ApproxTopK: 69 """ 70 Normalizes APPROX_TOP_K arguments to match Snowflake semantics. 71 72 Snowflake APPROX_TOP_K signature: APPROX_TOP_K(column [, k] [, counters]) 73 - k defaults to 1 if omitted (per Snowflake documentation) 74 - counters is optional precision parameter 75 """ 76 # Add default k=1 if only column is provided 77 if len(args) == 1: 78 args.append(exp.Literal.number(1)) 79 80 return exp.ApproxTopK.from_arg_list(args) 81 82 83def _build_date_from_parts(args: t.List) -> exp.DateFromParts: 84 return exp.DateFromParts( 85 year=seq_get(args, 0), 86 month=seq_get(args, 1), 87 day=seq_get(args, 2), 88 allow_overflow=True, 89 ) 90 91 92def _build_datetime( 93 name: str, kind: exp.DataType.Type, safe: bool = False 94) -> t.Callable[[t.List], exp.Func]: 95 def _builder(args: t.List) -> exp.Func: 96 value = seq_get(args, 0) 97 scale_or_fmt = seq_get(args, 1) 98 99 int_value = value is not None and is_int(value.name) 100 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 101 102 if isinstance(value, (exp.Literal, exp.Neg)) or (value and scale_or_fmt): 103 # Converts calls like `TO_TIME('01:02:03')` into casts 104 if len(args) == 1 and value.is_string and not int_value: 105 return ( 106 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 107 if safe 108 else exp.cast(value, kind) 109 ) 110 111 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 112 # cases so we can transpile them, since they're relatively common 113 if kind in TIMESTAMP_TYPES: 114 if not safe and (int_scale_or_fmt or (int_value and scale_or_fmt is None)): 115 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 116 # it's not easily transpilable. Also, numeric-looking strings with 117 # format strings (e.g., TO_TIMESTAMP('20240115', 'YYYYMMDD')) should 118 # use StrToTime, not UnixToTime. 119 unix_expr = exp.UnixToTime(this=value, scale=scale_or_fmt) 120 unix_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) 121 return unix_expr 122 if scale_or_fmt and not int_scale_or_fmt: 123 # Format string provided (e.g., 'YYYY-MM-DD'), use StrToTime 124 strtotime_expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 125 strtotime_expr.set("safe", safe) 126 strtotime_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) 127 return strtotime_expr 128 129 # Handle DATE/TIME with format strings - allow int_value if a format string is provided 130 has_format_string = scale_or_fmt and not int_scale_or_fmt 131 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and ( 132 not int_value or has_format_string 133 ): 134 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 135 formatted_exp = build_formatted_time(klass, "snowflake")(args) 136 formatted_exp.set("safe", safe) 137 return formatted_exp 138 139 return exp.Anonymous(this=name, expressions=args) 140 141 return _builder 142 143 144def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 145 expression = parser.build_var_map(args) 146 147 if isinstance(expression, exp.StarMap): 148 return expression 149 150 return exp.Struct( 151 expressions=[ 152 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 153 ] 154 ) 155 156 157def _build_datediff(args: t.List) -> exp.DateDiff: 158 return exp.DateDiff( 159 this=seq_get(args, 2), 160 expression=seq_get(args, 1), 161 unit=map_date_part(seq_get(args, 0)), 162 date_part_boundary=True, 163 ) 164 165 166def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 167 def _builder(args: t.List) -> E: 168 return expr_type( 169 this=seq_get(args, 2), 170 expression=seq_get(args, 1), 171 unit=map_date_part(seq_get(args, 0)), 172 ) 173 174 return _builder 175 176 177def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 178 def _builder(args: t.List) -> B | exp.Anonymous: 179 if len(args) == 3: 180 # Special handling for bitwise operations with padside argument 181 if expr_type in (exp.BitwiseAnd, exp.BitwiseOr, exp.BitwiseXor): 182 return expr_type( 183 this=seq_get(args, 0), expression=seq_get(args, 1), padside=seq_get(args, 2) 184 ) 185 return exp.Anonymous(this=name, expressions=args) 186 187 result = binary_from_function(expr_type)(args) 188 189 # Snowflake specifies INT128 for bitwise shifts 190 if expr_type in (exp.BitwiseLeftShift, exp.BitwiseRightShift): 191 result.set("requires_int128", True) 192 193 return result 194 195 return _builder 196 197 198# https://docs.snowflake.com/en/sql-reference/functions/div0 199def _build_if_from_div0(args: t.List) -> exp.If: 200 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 201 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 202 203 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 204 exp.Is(this=lhs, expression=exp.null()).not_() 205 ) 206 true = exp.Literal.number(0) 207 false = exp.Div(this=lhs, expression=rhs) 208 return exp.If(this=cond, true=true, false=false) 209 210 211# https://docs.snowflake.com/en/sql-reference/functions/div0null 212def _build_if_from_div0null(args: t.List) -> exp.If: 213 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 214 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 215 216 # Returns 0 when divisor is 0 OR NULL 217 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).or_( 218 exp.Is(this=rhs, expression=exp.null()) 219 ) 220 true = exp.Literal.number(0) 221 false = exp.Div(this=lhs, expression=rhs) 222 return exp.If(this=cond, true=true, false=false) 223 224 225# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 226def _build_if_from_zeroifnull(args: t.List) -> exp.If: 227 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 228 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 229 230 231def _build_search(args: t.List) -> exp.Search: 232 kwargs = { 233 "this": seq_get(args, 0), 234 "expression": seq_get(args, 1), 235 **{arg.name.lower(): arg for arg in args[2:] if isinstance(arg, exp.Kwarg)}, 236 } 237 return exp.Search(**kwargs) 238 239 240# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 241def _build_if_from_nullifzero(args: t.List) -> exp.If: 242 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 243 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 244 245 246def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 247 flag = expression.text("flag") 248 249 if "i" not in flag: 250 flag += "i" 251 252 return self.func( 253 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 254 ) 255 256 257def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 258 regexp_replace = exp.RegexpReplace.from_arg_list(args) 259 260 if not regexp_replace.args.get("replacement"): 261 regexp_replace.set("replacement", exp.Literal.string("")) 262 263 return regexp_replace 264 265 266def _build_regexp_like(args: t.List) -> exp.RegexpLike: 267 return exp.RegexpLike( 268 this=seq_get(args, 0), 269 expression=seq_get(args, 1), 270 flag=seq_get(args, 2), 271 full_match=True, 272 ) 273 274 275def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 276 def _parse(self: Snowflake.Parser) -> exp.Show: 277 return self._parse_show_snowflake(*args, **kwargs) 278 279 return _parse 280 281 282def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 283 trunc = date_trunc_to_time(args) 284 unit = map_date_part(trunc.args["unit"]) 285 trunc.set("unit", unit) 286 is_time_input = trunc.this.is_type(exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ) 287 if (isinstance(trunc, exp.TimestampTrunc) and is_date_unit(unit) or is_time_input) or ( 288 isinstance(trunc, exp.DateTrunc) and not is_date_unit(unit) 289 ): 290 trunc.set("input_type_preserved", True) 291 return trunc 292 293 294def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 295 """ 296 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 297 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 298 299 Example: 300 >>> from sqlglot import parse_one 301 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 302 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 303 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 304 """ 305 if isinstance(expression, exp.Pivot): 306 if expression.unpivot: 307 expression = transforms.unqualify_columns(expression) 308 else: 309 for field in expression.fields: 310 field_expr = seq_get(field.expressions if field else [], 0) 311 312 if isinstance(field_expr, exp.PivotAny): 313 unqualified_field_expr = transforms.unqualify_columns(field_expr) 314 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 315 316 return expression 317 318 319def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 320 assert isinstance(expression, exp.Create) 321 322 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 323 if expression.this in exp.DataType.NESTED_TYPES: 324 expression.set("expressions", None) 325 return expression 326 327 props = expression.args.get("properties") 328 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 329 for schema_expression in expression.this.expressions: 330 if isinstance(schema_expression, exp.ColumnDef): 331 column_type = schema_expression.kind 332 if isinstance(column_type, exp.DataType): 333 column_type.transform(_flatten_structured_type, copy=False) 334 335 return expression 336 337 338def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 339 generate_date_array = unnest.expressions[0] 340 start = generate_date_array.args.get("start") 341 end = generate_date_array.args.get("end") 342 step = generate_date_array.args.get("step") 343 344 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 345 return 346 347 unit = step.args.get("unit") 348 349 unnest_alias = unnest.args.get("alias") 350 if unnest_alias: 351 unnest_alias = unnest_alias.copy() 352 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 353 else: 354 sequence_value_name = "value" 355 356 # We'll add the next sequence value to the starting date and project the result 357 date_add = _build_date_time_add(exp.DateAdd)( 358 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 359 ) 360 361 # We use DATEDIFF to compute the number of sequence values needed 362 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 363 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 364 ) 365 366 unnest.set("expressions", [number_sequence]) 367 368 unnest_parent = unnest.parent 369 if isinstance(unnest_parent, exp.Join): 370 select = unnest_parent.parent 371 if isinstance(select, exp.Select): 372 replace_column_name = ( 373 sequence_value_name 374 if isinstance(sequence_value_name, str) 375 else sequence_value_name.name 376 ) 377 378 scope = build_scope(select) 379 if scope: 380 for column in scope.columns: 381 if column.name.lower() == replace_column_name.lower(): 382 column.replace( 383 date_add.as_(replace_column_name) 384 if isinstance(column.parent, exp.Select) 385 else date_add 386 ) 387 388 lateral = exp.Lateral(this=unnest_parent.this.pop()) 389 unnest_parent.replace(exp.Join(this=lateral)) 390 else: 391 unnest.replace( 392 exp.select(date_add.as_(sequence_value_name)) 393 .from_(unnest.copy()) 394 .subquery(unnest_alias) 395 ) 396 397 398def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 399 if isinstance(expression, exp.Select): 400 for generate_date_array in expression.find_all(exp.GenerateDateArray): 401 parent = generate_date_array.parent 402 403 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 404 # query is the following (it'll be unnested properly on the next iteration due to copy): 405 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 406 if not isinstance(parent, exp.Unnest): 407 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 408 generate_date_array.replace( 409 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 410 ) 411 412 if ( 413 isinstance(parent, exp.Unnest) 414 and isinstance(parent.parent, (exp.From, exp.Join)) 415 and len(parent.expressions) == 1 416 ): 417 _unnest_generate_date_array(parent) 418 419 return expression 420 421 422def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Snowflake], E]: 423 def _builder(args: t.List, dialect: Snowflake) -> E: 424 return expr_type( 425 this=seq_get(args, 0), 426 expression=seq_get(args, 1), 427 position=seq_get(args, 2), 428 occurrence=seq_get(args, 3), 429 parameters=seq_get(args, 4), 430 group=seq_get(args, 5) or exp.Literal.number(0), 431 **( 432 {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} 433 if expr_type is exp.RegexpExtract 434 else {} 435 ), 436 ) 437 438 return _builder 439 440 441def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 442 # Other dialects don't support all of the following parameters, so we need to 443 # generate default values as necessary to ensure the transpilation is correct 444 group = expression.args.get("group") 445 446 # To avoid generating all these default values, we set group to None if 447 # it's 0 (also default value) which doesn't trigger the following chain 448 if group and group.name == "0": 449 group = None 450 451 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 452 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 453 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 454 455 return self.func( 456 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_SUBSTR_ALL", 457 expression.this, 458 expression.expression, 459 position, 460 occurrence, 461 parameters, 462 group, 463 ) 464 465 466def _json_extract_value_array_sql( 467 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 468) -> str: 469 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 470 ident = exp.to_identifier("x") 471 472 if isinstance(expression, exp.JSONValueArray): 473 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 474 else: 475 this = exp.ParseJSON(this=f"TO_JSON({ident})") 476 477 transform_lambda = exp.Lambda(expressions=[ident], this=this) 478 479 return self.func("TRANSFORM", json_extract, transform_lambda) 480 481 482def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 483 if isinstance(expression, exp.Select): 484 scope = build_scope(expression) 485 if not scope: 486 return expression 487 488 unnests = list(scope.find_all(exp.Unnest)) 489 490 if not unnests: 491 return expression 492 493 taken_source_names = set(scope.sources) 494 column_source: t.Dict[str, exp.Identifier] = {} 495 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 496 497 unnest_identifier: t.Optional[exp.Identifier] = None 498 orig_expression = expression.copy() 499 500 for unnest in unnests: 501 if not isinstance(unnest.parent, (exp.From, exp.Join)): 502 continue 503 504 # Try to infer column names produced by an unnest operator. This is only possible 505 # when we can peek into the (statically known) contents of the unnested value. 506 unnest_columns: t.Set[str] = set() 507 for unnest_expr in unnest.expressions: 508 if not isinstance(unnest_expr, exp.Array): 509 continue 510 511 for array_expr in unnest_expr.expressions: 512 if not ( 513 isinstance(array_expr, exp.Struct) 514 and array_expr.expressions 515 and all( 516 isinstance(struct_expr, exp.PropertyEQ) 517 for struct_expr in array_expr.expressions 518 ) 519 ): 520 continue 521 522 unnest_columns.update( 523 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 524 ) 525 break 526 527 if unnest_columns: 528 break 529 530 unnest_alias = unnest.args.get("alias") 531 if not unnest_alias: 532 alias_name = find_new_name(taken_source_names, "value") 533 taken_source_names.add(alias_name) 534 535 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 536 # will be corrected later, when we generate SQL for the `Unnest` AST node. 537 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 538 scope.replace(unnest, aliased_unnest) 539 540 unnest_identifier = aliased_unnest.args["alias"].columns[0] 541 else: 542 alias_columns = getattr(unnest_alias, "columns", []) 543 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 544 545 if not isinstance(unnest_identifier, exp.Identifier): 546 return orig_expression 547 548 unnest_to_identifier[unnest] = unnest_identifier 549 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 550 551 for column in scope.columns: 552 if column.table: 553 continue 554 555 table = column_source.get(column.name.lower()) 556 if ( 557 unnest_identifier 558 and not table 559 and len(scope.sources) == 1 560 and column.name.lower() != unnest_identifier.name.lower() 561 ): 562 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 563 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 564 if ( 565 isinstance(unnest_ancestor, exp.Unnest) 566 and ancestor_identifier 567 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 568 ): 569 continue 570 571 table = unnest_identifier 572 573 column.set("table", table and table.copy()) 574 575 return expression 576 577 578def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 579 if isinstance(expression, exp.Select): 580 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 581 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 582 # by Snowflake's parser. 583 # 584 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 585 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 586 unnest_aliases = set() 587 for unnest in find_all_in_scope(expression, exp.Unnest): 588 unnest_alias = unnest.args.get("alias") 589 if ( 590 isinstance(unnest_alias, exp.TableAlias) 591 and not unnest_alias.this 592 and len(unnest_alias.columns) == 1 593 ): 594 unnest_aliases.add(unnest_alias.columns[0].name) 595 596 if unnest_aliases: 597 for c in find_all_in_scope(expression, exp.Column): 598 if c.table in unnest_aliases: 599 bracket_lhs = c.args["table"] 600 bracket_rhs = exp.Literal.string(c.name) 601 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 602 603 if c.parent is expression: 604 # Retain column projection names by using aliases 605 c.replace(exp.alias_(bracket, c.this.copy())) 606 else: 607 c.replace(bracket) 608 609 return expression 610 611 612def _build_timestamp_from_parts(args: t.List) -> exp.Func: 613 """Build TimestampFromParts with support for both syntaxes: 614 1. TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second [, nanosecond] [, time_zone]) 615 2. TIMESTAMP_FROM_PARTS(date_expr, time_expr) - Snowflake specific 616 """ 617 if len(args) == 2: 618 return exp.TimestampFromParts(this=seq_get(args, 0), expression=seq_get(args, 1)) 619 620 return exp.TimestampFromParts.from_arg_list(args) 621 622 623def _build_round(args: t.List) -> exp.Round: 624 """ 625 Build Round expression, unwrapping Snowflake's named parameters. 626 627 Maps EXPR => this, SCALE => decimals, ROUNDING_MODE => truncate. 628 629 Note: Snowflake does not support mixing named and positional arguments. 630 Arguments are either all named or all positional. 631 """ 632 kwarg_map = {"EXPR": "this", "SCALE": "decimals", "ROUNDING_MODE": "truncate"} 633 round_args = {} 634 positional_keys = ["this", "decimals", "truncate"] 635 positional_idx = 0 636 637 for arg in args: 638 if isinstance(arg, exp.Kwarg): 639 key = arg.this.name.upper() 640 round_key = kwarg_map.get(key) 641 if round_key: 642 round_args[round_key] = arg.expression 643 else: 644 if positional_idx < len(positional_keys): 645 round_args[positional_keys[positional_idx]] = arg 646 positional_idx += 1 647 648 expression = exp.Round(**round_args) 649 expression.set("casts_non_integer_decimals", True) 650 return expression 651 652 653def _build_generator(args: t.List) -> exp.Generator: 654 """ 655 Build Generator expression, unwrapping Snowflake's named parameters. 656 657 Maps ROWCOUNT => rowcount, TIMELIMIT => time_limit. 658 """ 659 kwarg_map = {"ROWCOUNT": "rowcount", "TIMELIMIT": "time_limit"} 660 gen_args = {} 661 662 for arg in args: 663 if isinstance(arg, exp.Kwarg): 664 key = arg.this.name.upper() 665 gen_key = kwarg_map.get(key) 666 if gen_key: 667 gen_args[gen_key] = arg.expression 668 669 return exp.Generator(**gen_args) 670 671 672def _build_try_to_number(args: t.List[exp.Expression]) -> exp.Expression: 673 return exp.ToNumber( 674 this=seq_get(args, 0), 675 format=seq_get(args, 1), 676 precision=seq_get(args, 2), 677 scale=seq_get(args, 3), 678 safe=True, 679 ) 680 681 682class Snowflake(Dialect): 683 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 684 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 685 NULL_ORDERING = "nulls_are_large" 686 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 687 SUPPORTS_USER_DEFINED_TYPES = False 688 SUPPORTS_SEMI_ANTI_JOIN = False 689 PREFER_CTE_ALIAS_COLUMN = True 690 TABLESAMPLE_SIZE_IS_PERCENT = True 691 COPY_PARAMS_ARE_CSV = False 692 ARRAY_AGG_INCLUDES_NULLS = None 693 ARRAY_FUNCS_PROPAGATES_NULLS = True 694 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 695 TRY_CAST_REQUIRES_STRING = True 696 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 697 LEAST_GREATEST_IGNORES_NULLS = False 698 699 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 700 701 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 702 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 703 704 INVERSE_TIME_MAPPING = { 705 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 706 } 707 708 TIME_MAPPING = { 709 "YYYY": "%Y", 710 "yyyy": "%Y", 711 "YY": "%y", 712 "yy": "%y", 713 "MMMM": "%B", 714 "mmmm": "%B", 715 "MON": "%b", 716 "mon": "%b", 717 "MM": "%m", 718 "mm": "%m", 719 "DD": "%d", 720 "dd": "%-d", 721 "DY": "%a", 722 "dy": "%w", 723 "HH24": "%H", 724 "hh24": "%H", 725 "HH12": "%I", 726 "hh12": "%I", 727 "MI": "%M", 728 "mi": "%M", 729 "SS": "%S", 730 "ss": "%S", 731 "FF": "%f_nine", # %f_ internal representation with precision specified 732 "ff": "%f_nine", 733 "FF0": "%f_zero", 734 "ff0": "%f_zero", 735 "FF1": "%f_one", 736 "ff1": "%f_one", 737 "FF2": "%f_two", 738 "ff2": "%f_two", 739 "FF3": "%f_three", 740 "ff3": "%f_three", 741 "FF4": "%f_four", 742 "ff4": "%f_four", 743 "FF5": "%f_five", 744 "ff5": "%f_five", 745 "FF6": "%f", 746 "ff6": "%f", 747 "FF7": "%f_seven", 748 "ff7": "%f_seven", 749 "FF8": "%f_eight", 750 "ff8": "%f_eight", 751 "FF9": "%f_nine", 752 "ff9": "%f_nine", 753 "TZHTZM": "%z", 754 "tzhtzm": "%z", 755 "TZH:TZM": "%:z", # internal representation for ±HH:MM 756 "tzh:tzm": "%:z", 757 "TZH": "%-z", # internal representation ±HH 758 "tzh": "%-z", 759 '"T"': "T", # remove the optional double quotes around the separator between the date and time 760 # Seems like Snowflake treats AM/PM in the format string as equivalent, 761 # only the time (stamp) value's AM/PM affects the output 762 "AM": "%p", 763 "am": "%p", 764 "PM": "%p", 765 "pm": "%p", 766 } 767 768 DATE_PART_MAPPING = { 769 **Dialect.DATE_PART_MAPPING, 770 "ISOWEEK": "WEEKISO", 771 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 772 # EPOCH_SECOND as a distinct value for two reasons: 773 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 774 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 775 # to BIGINT to match Snowflake's integer behavior 776 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 777 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 778 "EPOCH_SECOND": "EPOCH_SECOND", 779 "EPOCH_SECONDS": "EPOCH_SECOND", 780 } 781 782 PSEUDOCOLUMNS = {"LEVEL"} 783 784 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 785 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 786 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 787 return super().can_quote(identifier, identify) and not ( 788 isinstance(identifier.parent, exp.Table) 789 and not identifier.quoted 790 and identifier.name.lower() == "dual" 791 ) 792 793 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 794 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 795 SINGLE_TOKENS.pop("$") 796 797 class Parser(parser.Parser): 798 IDENTIFY_PIVOT_STRINGS = True 799 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 800 COLON_IS_VARIANT_EXTRACT = True 801 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 802 803 ID_VAR_TOKENS = { 804 *parser.Parser.ID_VAR_TOKENS, 805 TokenType.EXCEPT, 806 TokenType.MATCH_CONDITION, 807 } 808 809 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 810 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 811 812 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 813 814 NO_PAREN_FUNCTIONS = { 815 **parser.Parser.NO_PAREN_FUNCTIONS, 816 TokenType.CURRENT_TIME: exp.Localtime, 817 } 818 819 FUNCTIONS = { 820 **parser.Parser.FUNCTIONS, 821 "ADD_MONTHS": lambda args: exp.AddMonths( 822 this=seq_get(args, 0), 823 expression=seq_get(args, 1), 824 preserve_end_of_month=True, 825 ), 826 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 827 "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), 828 "APPROX_TOP_K": _build_approx_top_k, 829 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 830 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 831 this=seq_get(args, 1), 832 expression=seq_get(args, 0), 833 ensure_variant=False, 834 check_null=True, 835 ), 836 "ARRAY_DISTINCT": lambda args: exp.ArrayDistinct( 837 this=seq_get(args, 0), 838 check_null=True, 839 ), 840 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 841 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 842 start=seq_get(args, 0), 843 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 844 step=seq_get(args, 2), 845 ), 846 "ARRAY_SORT": exp.SortArray.from_arg_list, 847 "ARRAY_FLATTEN": exp.Flatten.from_arg_list, 848 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 849 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 850 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 851 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 852 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 853 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 854 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 855 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 856 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 857 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 858 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 859 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 860 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 861 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 862 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 863 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 864 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 865 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 866 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 867 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 868 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 869 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 870 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 871 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 872 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 873 "BOOLAND": lambda args: exp.Booland( 874 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 875 ), 876 "BOOLOR": lambda args: exp.Boolor( 877 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 878 ), 879 "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), 880 "BOOLXOR": lambda args: exp.Xor( 881 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 882 ), 883 "CORR": lambda args: exp.Corr( 884 this=seq_get(args, 0), 885 expression=seq_get(args, 1), 886 null_on_zero_variance=True, 887 ), 888 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 889 "DATEFROMPARTS": _build_date_from_parts, 890 "DATE_FROM_PARTS": _build_date_from_parts, 891 "DATE_TRUNC": _date_trunc_to_time, 892 "DATEADD": _build_date_time_add(exp.DateAdd), 893 "DATEDIFF": _build_datediff, 894 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 895 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 896 "DIV0": _build_if_from_div0, 897 "DIV0NULL": _build_if_from_div0null, 898 "EDITDISTANCE": lambda args: exp.Levenshtein( 899 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 900 ), 901 "FLATTEN": exp.Explode.from_arg_list, 902 "GENERATOR": _build_generator, 903 "GET": exp.GetExtract.from_arg_list, 904 "GETDATE": exp.CurrentTimestamp.from_arg_list, 905 "GET_PATH": lambda args, dialect: exp.JSONExtract( 906 this=seq_get(args, 0), 907 expression=dialect.to_json_path(seq_get(args, 1)), 908 requires_json=True, 909 ), 910 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 911 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 912 ), 913 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 914 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 915 ), 916 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 917 "IFF": exp.If.from_arg_list, 918 "JAROWINKLER_SIMILARITY": lambda args: exp.JarowinklerSimilarity( 919 this=seq_get(args, 0), 920 expression=seq_get(args, 1), 921 case_insensitive=True, 922 ), 923 "MD5_HEX": exp.MD5.from_arg_list, 924 "MD5_BINARY": exp.MD5Digest.from_arg_list, 925 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 926 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 927 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 928 "LAST_DAY": lambda args: exp.LastDay( 929 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 930 ), 931 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 932 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 933 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 934 "NULLIFZERO": _build_if_from_nullifzero, 935 "OBJECT_CONSTRUCT": _build_object_construct, 936 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 937 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 938 "PARSE_URL": lambda args: exp.ParseUrl( 939 this=seq_get(args, 0), permissive=seq_get(args, 1) 940 ), 941 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 942 "REGEXP_LIKE": _build_regexp_like, 943 "REGEXP_REPLACE": _build_regexp_replace, 944 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 945 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 946 "REPLACE": build_replace_with_optional_replacement, 947 "RLIKE": _build_regexp_like, 948 "ROUND": _build_round, 949 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 950 "SHA1_HEX": exp.SHA.from_arg_list, 951 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 952 "SHA2_HEX": exp.SHA2.from_arg_list, 953 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 954 "STDDEV_SAMP": exp.Stddev.from_arg_list, 955 "STRTOK": _build_strtok, 956 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 957 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 958 "TIMEADD": _build_date_time_add(exp.TimeAdd), 959 "TIMEDIFF": _build_datediff, 960 "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( 961 hour=seq_get(args, 0), 962 min=seq_get(args, 1), 963 sec=seq_get(args, 2), 964 nano=seq_get(args, 3), 965 overflow=True, 966 ), 967 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 968 "TIMESTAMPDIFF": _build_datediff, 969 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 970 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 971 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 972 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 973 "TRUNC": lambda args, dialect: build_trunc( 974 args, dialect, date_trunc_requires_part=False 975 ), 976 "TRUNCATE": lambda args, dialect: build_trunc( 977 args, dialect, date_trunc_requires_part=False 978 ), 979 "TRY_DECRYPT": lambda args: exp.Decrypt( 980 this=seq_get(args, 0), 981 passphrase=seq_get(args, 1), 982 aad=seq_get(args, 2), 983 encryption_method=seq_get(args, 3), 984 safe=True, 985 ), 986 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 987 this=seq_get(args, 0), 988 key=seq_get(args, 1), 989 iv=seq_get(args, 2), 990 aad=seq_get(args, 3), 991 encryption_method=seq_get(args, 4), 992 aead=seq_get(args, 5), 993 safe=True, 994 ), 995 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 996 "TRY_TO_BINARY": lambda args: exp.ToBinary( 997 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 998 ), 999 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 1000 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 1001 **dict.fromkeys( 1002 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 1003 ), 1004 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 1005 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 1006 ), 1007 "TRY_TO_FILE": lambda args: exp.ToFile( 1008 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 1009 ), 1010 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 1011 "TRY_TO_TIMESTAMP": _build_datetime( 1012 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 1013 ), 1014 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 1015 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 1016 ), 1017 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 1018 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 1019 ), 1020 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 1021 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 1022 ), 1023 "TO_CHAR": build_timetostr_or_tochar, 1024 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 1025 **dict.fromkeys( 1026 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 1027 lambda args: exp.ToNumber( 1028 this=seq_get(args, 0), 1029 format=seq_get(args, 1), 1030 precision=seq_get(args, 2), 1031 scale=seq_get(args, 3), 1032 ), 1033 ), 1034 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 1035 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 1036 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 1037 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 1038 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 1039 "TO_VARCHAR": build_timetostr_or_tochar, 1040 "TO_JSON": exp.JSONFormat.from_arg_list, 1041 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 1042 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 1043 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 1044 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 1045 "ZEROIFNULL": _build_if_from_zeroifnull, 1046 "LIKE": build_like(exp.Like), 1047 "ILIKE": build_like(exp.ILike), 1048 "SEARCH": _build_search, 1049 "SKEW": exp.Skewness.from_arg_list, 1050 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 1051 "WEEKISO": exp.WeekOfYear.from_arg_list, 1052 "WEEKOFYEAR": exp.Week.from_arg_list, 1053 } 1054 FUNCTIONS.pop("PREDICT") 1055 1056 FUNCTION_PARSERS = { 1057 **parser.Parser.FUNCTION_PARSERS, 1058 "DATE_PART": lambda self: self._parse_date_part(), 1059 "DIRECTORY": lambda self: self._parse_directory(), 1060 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 1061 "LISTAGG": lambda self: self._parse_string_agg(), 1062 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 1063 } 1064 FUNCTION_PARSERS.pop("TRIM") 1065 1066 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 1067 1068 ALTER_PARSERS = { 1069 **parser.Parser.ALTER_PARSERS, 1070 "MODIFY": lambda self: self._parse_alter_table_alter(), 1071 "SESSION": lambda self: self._parse_alter_session(), 1072 "UNSET": lambda self: self.expression( 1073 exp.Set, 1074 tag=self._match_text_seq("TAG"), 1075 expressions=self._parse_csv(self._parse_id_var), 1076 unset=True, 1077 ), 1078 } 1079 1080 STATEMENT_PARSERS = { 1081 **parser.Parser.STATEMENT_PARSERS, 1082 TokenType.GET: lambda self: self._parse_get(), 1083 TokenType.PUT: lambda self: self._parse_put(), 1084 TokenType.SHOW: lambda self: self._parse_show(), 1085 } 1086 1087 PROPERTY_PARSERS = { 1088 **parser.Parser.PROPERTY_PARSERS, 1089 "CREDENTIALS": lambda self: self._parse_credentials_property(), 1090 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 1091 "LOCATION": lambda self: self._parse_location_property(), 1092 "TAG": lambda self: self._parse_tag(), 1093 "USING": lambda self: self._match_text_seq("TEMPLATE") 1094 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 1095 } 1096 1097 TYPE_CONVERTERS = { 1098 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 1099 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 1100 } 1101 1102 SHOW_PARSERS = { 1103 "DATABASES": _show_parser("DATABASES"), 1104 "TERSE DATABASES": _show_parser("DATABASES"), 1105 "SCHEMAS": _show_parser("SCHEMAS"), 1106 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 1107 "OBJECTS": _show_parser("OBJECTS"), 1108 "TERSE OBJECTS": _show_parser("OBJECTS"), 1109 "TABLES": _show_parser("TABLES"), 1110 "TERSE TABLES": _show_parser("TABLES"), 1111 "VIEWS": _show_parser("VIEWS"), 1112 "TERSE VIEWS": _show_parser("VIEWS"), 1113 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1114 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1115 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1116 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1117 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1118 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1119 "SEQUENCES": _show_parser("SEQUENCES"), 1120 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1121 "STAGES": _show_parser("STAGES"), 1122 "COLUMNS": _show_parser("COLUMNS"), 1123 "USERS": _show_parser("USERS"), 1124 "TERSE USERS": _show_parser("USERS"), 1125 "FILE FORMATS": _show_parser("FILE FORMATS"), 1126 "FUNCTIONS": _show_parser("FUNCTIONS"), 1127 "PROCEDURES": _show_parser("PROCEDURES"), 1128 "WAREHOUSES": _show_parser("WAREHOUSES"), 1129 } 1130 1131 CONSTRAINT_PARSERS = { 1132 **parser.Parser.CONSTRAINT_PARSERS, 1133 "WITH": lambda self: self._parse_with_constraint(), 1134 "MASKING": lambda self: self._parse_with_constraint(), 1135 "PROJECTION": lambda self: self._parse_with_constraint(), 1136 "TAG": lambda self: self._parse_with_constraint(), 1137 } 1138 1139 STAGED_FILE_SINGLE_TOKENS = { 1140 TokenType.DOT, 1141 TokenType.MOD, 1142 TokenType.SLASH, 1143 } 1144 1145 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1146 1147 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1148 1149 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1150 1151 LAMBDAS = { 1152 **parser.Parser.LAMBDAS, 1153 TokenType.ARROW: lambda self, expressions: self.expression( 1154 exp.Lambda, 1155 this=self._replace_lambda( 1156 self._parse_assignment(), 1157 expressions, 1158 ), 1159 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1160 ), 1161 } 1162 1163 COLUMN_OPERATORS = { 1164 **parser.Parser.COLUMN_OPERATORS, 1165 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1166 exp.ModelAttribute, this=this, expression=attr 1167 ), 1168 } 1169 1170 def _parse_directory(self) -> exp.DirectoryStage: 1171 table = self._parse_table_parts() 1172 1173 if isinstance(table, exp.Table): 1174 table = table.this 1175 1176 return self.expression(exp.DirectoryStage, this=table) 1177 1178 def _parse_use(self) -> exp.Use: 1179 if self._match_text_seq("SECONDARY", "ROLES"): 1180 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1181 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1182 return self.expression( 1183 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1184 ) 1185 1186 return super()._parse_use() 1187 1188 def _negate_range( 1189 self, this: t.Optional[exp.Expression] = None 1190 ) -> t.Optional[exp.Expression]: 1191 if not this: 1192 return this 1193 1194 query = this.args.get("query") 1195 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1196 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1197 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1198 # which can produce different results (most likely a SnowFlake bug). 1199 # 1200 # https://docs.snowflake.com/en/sql-reference/functions/in 1201 # Context: https://github.com/tobymao/sqlglot/issues/3890 1202 return self.expression( 1203 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1204 ) 1205 1206 return self.expression(exp.Not, this=this) 1207 1208 def _parse_tag(self) -> exp.Tags: 1209 return self.expression( 1210 exp.Tags, 1211 expressions=self._parse_wrapped_csv(self._parse_property), 1212 ) 1213 1214 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1215 if self._prev.token_type != TokenType.WITH: 1216 self._retreat(self._index - 1) 1217 1218 if self._match_text_seq("MASKING", "POLICY"): 1219 policy = self._parse_column() 1220 return self.expression( 1221 exp.MaskingPolicyColumnConstraint, 1222 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1223 expressions=self._match(TokenType.USING) 1224 and self._parse_wrapped_csv(self._parse_id_var), 1225 ) 1226 if self._match_text_seq("PROJECTION", "POLICY"): 1227 policy = self._parse_column() 1228 return self.expression( 1229 exp.ProjectionPolicyColumnConstraint, 1230 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1231 ) 1232 if self._match(TokenType.TAG): 1233 return self._parse_tag() 1234 1235 return None 1236 1237 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1238 if self._match(TokenType.TAG): 1239 return self._parse_tag() 1240 1241 return super()._parse_with_property() 1242 1243 def _parse_create(self) -> exp.Create | exp.Command: 1244 expression = super()._parse_create() 1245 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1246 # Replace the Table node with the enclosed Identifier 1247 expression.this.replace(expression.this.this) 1248 1249 return expression 1250 1251 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1252 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1253 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1254 this = self._parse_var() or self._parse_type() 1255 1256 if not this: 1257 return None 1258 1259 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1260 expression = ( 1261 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1262 ) 1263 return self.expression( 1264 exp.Extract, this=map_date_part(this, self.dialect), expression=expression 1265 ) 1266 1267 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1268 if is_map: 1269 # Keys are strings in Snowflake's objects, see also: 1270 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1271 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1272 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1273 1274 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1275 1276 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1277 lateral = super()._parse_lateral() 1278 if not lateral: 1279 return lateral 1280 1281 if isinstance(lateral.this, exp.Explode): 1282 table_alias = lateral.args.get("alias") 1283 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1284 if table_alias and not table_alias.args.get("columns"): 1285 table_alias.set("columns", columns) 1286 elif not table_alias: 1287 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1288 1289 return lateral 1290 1291 def _parse_table_parts( 1292 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1293 ) -> exp.Table: 1294 # https://docs.snowflake.com/en/user-guide/querying-stage 1295 if self._match(TokenType.STRING, advance=False): 1296 table = self._parse_string() 1297 elif self._match_text_seq("@", advance=False): 1298 table = self._parse_location_path() 1299 else: 1300 table = None 1301 1302 if table: 1303 file_format = None 1304 pattern = None 1305 1306 wrapped = self._match(TokenType.L_PAREN) 1307 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1308 if self._match_text_seq("FILE_FORMAT", "=>"): 1309 file_format = self._parse_string() or super()._parse_table_parts( 1310 is_db_reference=is_db_reference 1311 ) 1312 elif self._match_text_seq("PATTERN", "=>"): 1313 pattern = self._parse_string() 1314 else: 1315 break 1316 1317 self._match(TokenType.COMMA) 1318 1319 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1320 else: 1321 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1322 1323 return table 1324 1325 def _parse_table( 1326 self, 1327 schema: bool = False, 1328 joins: bool = False, 1329 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1330 parse_bracket: bool = False, 1331 is_db_reference: bool = False, 1332 parse_partition: bool = False, 1333 consume_pipe: bool = False, 1334 ) -> t.Optional[exp.Expression]: 1335 table = super()._parse_table( 1336 schema=schema, 1337 joins=joins, 1338 alias_tokens=alias_tokens, 1339 parse_bracket=parse_bracket, 1340 is_db_reference=is_db_reference, 1341 parse_partition=parse_partition, 1342 ) 1343 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1344 table_from_rows = table.this 1345 for arg in exp.TableFromRows.arg_types: 1346 if arg != "this": 1347 table_from_rows.set(arg, table.args.get(arg)) 1348 1349 table = table_from_rows 1350 1351 return table 1352 1353 def _parse_id_var( 1354 self, 1355 any_token: bool = True, 1356 tokens: t.Optional[t.Collection[TokenType]] = None, 1357 ) -> t.Optional[exp.Expression]: 1358 if self._match_text_seq("IDENTIFIER", "("): 1359 identifier = ( 1360 super()._parse_id_var(any_token=any_token, tokens=tokens) 1361 or self._parse_string() 1362 ) 1363 self._match_r_paren() 1364 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1365 1366 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1367 1368 def _parse_show_snowflake(self, this: str) -> exp.Show: 1369 scope = None 1370 scope_kind = None 1371 1372 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1373 # which is syntactically valid but has no effect on the output 1374 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1375 1376 history = self._match_text_seq("HISTORY") 1377 1378 like = self._parse_string() if self._match(TokenType.LIKE) else None 1379 1380 if self._match(TokenType.IN): 1381 if self._match_text_seq("ACCOUNT"): 1382 scope_kind = "ACCOUNT" 1383 elif self._match_text_seq("CLASS"): 1384 scope_kind = "CLASS" 1385 scope = self._parse_table_parts() 1386 elif self._match_text_seq("APPLICATION"): 1387 scope_kind = "APPLICATION" 1388 if self._match_text_seq("PACKAGE"): 1389 scope_kind += " PACKAGE" 1390 scope = self._parse_table_parts() 1391 elif self._match_set(self.DB_CREATABLES): 1392 scope_kind = self._prev.text.upper() 1393 if self._curr: 1394 scope = self._parse_table_parts() 1395 elif self._curr: 1396 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1397 scope = self._parse_table_parts() 1398 1399 return self.expression( 1400 exp.Show, 1401 terse=terse, 1402 this=this, 1403 history=history, 1404 like=like, 1405 scope=scope, 1406 scope_kind=scope_kind, 1407 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1408 limit=self._parse_limit(), 1409 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1410 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1411 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1412 ) 1413 1414 def _parse_put(self) -> exp.Put | exp.Command: 1415 if self._curr.token_type != TokenType.STRING: 1416 return self._parse_as_command(self._prev) 1417 1418 return self.expression( 1419 exp.Put, 1420 this=self._parse_string(), 1421 target=self._parse_location_path(), 1422 properties=self._parse_properties(), 1423 ) 1424 1425 def _parse_get(self) -> t.Optional[exp.Expression]: 1426 start = self._prev 1427 1428 # If we detect GET( then we need to parse a function, not a statement 1429 if self._match(TokenType.L_PAREN): 1430 self._retreat(self._index - 2) 1431 return self._parse_expression() 1432 1433 target = self._parse_location_path() 1434 1435 # Parse as command if unquoted file path 1436 if self._curr.token_type == TokenType.URI_START: 1437 return self._parse_as_command(start) 1438 1439 return self.expression( 1440 exp.Get, 1441 this=self._parse_string(), 1442 target=target, 1443 properties=self._parse_properties(), 1444 ) 1445 1446 def _parse_location_property(self) -> exp.LocationProperty: 1447 self._match(TokenType.EQ) 1448 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1449 1450 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1451 # Parse either a subquery or a staged file 1452 return ( 1453 self._parse_select(table=True, parse_subquery_alias=False) 1454 if self._match(TokenType.L_PAREN, advance=False) 1455 else self._parse_table_parts() 1456 ) 1457 1458 def _parse_location_path(self) -> exp.Var: 1459 start = self._curr 1460 self._advance_any(ignore_reserved=True) 1461 1462 # We avoid consuming a comma token because external tables like @foo and @bar 1463 # can be joined in a query with a comma separator, as well as closing paren 1464 # in case of subqueries 1465 while self._is_connected() and not self._match_set( 1466 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1467 ): 1468 self._advance_any(ignore_reserved=True) 1469 1470 return exp.var(self._find_sql(start, self._prev)) 1471 1472 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1473 this = super()._parse_lambda_arg() 1474 1475 if not this: 1476 return this 1477 1478 typ = self._parse_types() 1479 1480 if typ: 1481 return self.expression(exp.Cast, this=this, to=typ) 1482 1483 return this 1484 1485 def _parse_foreign_key(self) -> exp.ForeignKey: 1486 # inlineFK, the REFERENCES columns are implied 1487 if self._match(TokenType.REFERENCES, advance=False): 1488 return self.expression(exp.ForeignKey) 1489 1490 # outoflineFK, explicitly names the columns 1491 return super()._parse_foreign_key() 1492 1493 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1494 self._match(TokenType.EQ) 1495 if self._match(TokenType.L_PAREN, advance=False): 1496 expressions = self._parse_wrapped_options() 1497 else: 1498 expressions = [self._parse_format_name()] 1499 1500 return self.expression( 1501 exp.FileFormatProperty, 1502 expressions=expressions, 1503 ) 1504 1505 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1506 return self.expression( 1507 exp.CredentialsProperty, 1508 expressions=self._parse_wrapped_options(), 1509 ) 1510 1511 def _parse_semantic_view(self) -> exp.SemanticView: 1512 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1513 1514 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1515 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1516 keyword = self._prev.text.lower() 1517 kwargs[keyword] = self._parse_csv( 1518 lambda: self._parse_alias(self._parse_disjunction(), explicit=True) 1519 ) 1520 elif self._match_text_seq("WHERE"): 1521 kwargs["where"] = self._parse_expression() 1522 else: 1523 self.raise_error("Expecting ) or encountered unexpected keyword") 1524 break 1525 1526 return self.expression(exp.SemanticView, **kwargs) 1527 1528 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1529 set = super()._parse_set(unset=unset, tag=tag) 1530 1531 if isinstance(set, exp.Set): 1532 for expr in set.expressions: 1533 if isinstance(expr, exp.SetItem): 1534 expr.set("kind", "VARIABLE") 1535 return set 1536 1537 def _parse_window( 1538 self, this: t.Optional[exp.Expression], alias: bool = False 1539 ) -> t.Optional[exp.Expression]: 1540 if isinstance(this, exp.NthValue): 1541 if self._match_text_seq("FROM"): 1542 if self._match_texts(("FIRST", "LAST")): 1543 from_first = self._prev.text.upper() == "FIRST" 1544 this.set("from_first", from_first) 1545 1546 result = super()._parse_window(this, alias) 1547 1548 return result 1549 1550 class Tokenizer(tokens.Tokenizer): 1551 STRING_ESCAPES = ["\\", "'"] 1552 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1553 RAW_STRINGS = ["$$"] 1554 COMMENTS = ["--", "//", ("/*", "*/")] 1555 NESTED_COMMENTS = False 1556 1557 KEYWORDS = { 1558 **tokens.Tokenizer.KEYWORDS, 1559 "BYTEINT": TokenType.INT, 1560 "FILE://": TokenType.URI_START, 1561 "FILE FORMAT": TokenType.FILE_FORMAT, 1562 "GET": TokenType.GET, 1563 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1564 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1565 "MINUS": TokenType.EXCEPT, 1566 "NCHAR VARYING": TokenType.VARCHAR, 1567 "PUT": TokenType.PUT, 1568 "REMOVE": TokenType.COMMAND, 1569 "RM": TokenType.COMMAND, 1570 "SAMPLE": TokenType.TABLE_SAMPLE, 1571 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1572 "SQL_DOUBLE": TokenType.DOUBLE, 1573 "SQL_VARCHAR": TokenType.VARCHAR, 1574 "STAGE": TokenType.STAGE, 1575 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1576 "STREAMLIT": TokenType.STREAMLIT, 1577 "TAG": TokenType.TAG, 1578 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1579 "TOP": TokenType.TOP, 1580 "WAREHOUSE": TokenType.WAREHOUSE, 1581 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1582 # FLOAT is a synonym for DOUBLE in Snowflake 1583 "FLOAT": TokenType.DOUBLE, 1584 } 1585 KEYWORDS.pop("/*+") 1586 1587 SINGLE_TOKENS = { 1588 **tokens.Tokenizer.SINGLE_TOKENS, 1589 "$": TokenType.PARAMETER, 1590 "!": TokenType.EXCLAMATION, 1591 } 1592 1593 VAR_SINGLE_TOKENS = {"$"} 1594 1595 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1596 1597 class Generator(generator.Generator): 1598 PARAMETER_TOKEN = "$" 1599 MATCHED_BY_SOURCE = False 1600 SINGLE_STRING_INTERVAL = True 1601 JOIN_HINTS = False 1602 TABLE_HINTS = False 1603 QUERY_HINTS = False 1604 AGGREGATE_FILTER_SUPPORTED = False 1605 SUPPORTS_TABLE_COPY = False 1606 COLLATE_IS_FUNC = True 1607 LIMIT_ONLY_LITERALS = True 1608 JSON_KEY_VALUE_PAIR_SEP = "," 1609 INSERT_OVERWRITE = " OVERWRITE INTO" 1610 STRUCT_DELIMITER = ("(", ")") 1611 COPY_PARAMS_ARE_WRAPPED = False 1612 COPY_PARAMS_EQ_REQUIRED = True 1613 STAR_EXCEPT = "EXCLUDE" 1614 SUPPORTS_EXPLODING_PROJECTIONS = False 1615 ARRAY_CONCAT_IS_VAR_LEN = False 1616 SUPPORTS_CONVERT_TIMEZONE = True 1617 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1618 SUPPORTS_MEDIAN = True 1619 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1620 SUPPORTS_DECODE_CASE = True 1621 IS_BOOL_ALLOWED = False 1622 DIRECTED_JOINS = True 1623 1624 TRANSFORMS = { 1625 **generator.Generator.TRANSFORMS, 1626 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1627 exp.ArgMax: rename_func("MAX_BY"), 1628 exp.ArgMin: rename_func("MIN_BY"), 1629 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1630 exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), 1631 exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), 1632 exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), 1633 exp.ArrayContains: lambda self, e: self.func( 1634 "ARRAY_CONTAINS", 1635 e.expression 1636 if e.args.get("ensure_variant") is False 1637 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1638 e.this, 1639 ), 1640 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1641 exp.AtTimeZone: lambda self, e: self.func( 1642 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1643 ), 1644 exp.BitwiseOr: rename_func("BITOR"), 1645 exp.BitwiseXor: rename_func("BITXOR"), 1646 exp.BitwiseAnd: rename_func("BITAND"), 1647 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1648 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1649 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1650 exp.BitwiseNot: rename_func("BITNOT"), 1651 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1652 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1653 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1654 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1655 if e.args.get("sysdate") 1656 else self.function_fallback_sql(e), 1657 exp.CurrentSchemas: lambda self, e: self.func("CURRENT_SCHEMAS"), 1658 exp.Localtime: lambda self, e: self.func("CURRENT_TIME", e.this) 1659 if e.this 1660 else "CURRENT_TIME", 1661 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1662 if e.this 1663 else "CURRENT_TIMESTAMP", 1664 exp.DateAdd: date_delta_sql("DATEADD"), 1665 exp.DateDiff: date_delta_sql("DATEDIFF"), 1666 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1667 exp.DatetimeDiff: timestampdiff_sql, 1668 exp.DateStrToDate: datestrtodate_sql, 1669 exp.Decrypt: lambda self, e: self.func( 1670 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1671 e.this, 1672 e.args.get("passphrase"), 1673 e.args.get("aad"), 1674 e.args.get("encryption_method"), 1675 ), 1676 exp.DecryptRaw: lambda self, e: self.func( 1677 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1678 e.this, 1679 e.args.get("key"), 1680 e.args.get("iv"), 1681 e.args.get("aad"), 1682 e.args.get("encryption_method"), 1683 e.args.get("aead"), 1684 ), 1685 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1686 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1687 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1688 exp.DayOfYear: rename_func("DAYOFYEAR"), 1689 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1690 exp.Explode: rename_func("FLATTEN"), 1691 exp.Extract: lambda self, e: self.func( 1692 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1693 ), 1694 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1695 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1696 exp.FileFormatProperty: lambda self, 1697 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1698 exp.FromTimeZone: lambda self, e: self.func( 1699 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1700 ), 1701 exp.GenerateSeries: lambda self, e: self.func( 1702 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1703 ), 1704 exp.GetExtract: rename_func("GET"), 1705 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1706 exp.If: if_sql(name="IFF", false_value="NULL"), 1707 exp.JSONExtractArray: _json_extract_value_array_sql, 1708 exp.JSONExtractScalar: lambda self, e: self.func( 1709 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1710 ), 1711 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1712 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1713 exp.JSONPathRoot: lambda *_: "", 1714 exp.JSONValueArray: _json_extract_value_array_sql, 1715 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1716 rename_func("EDITDISTANCE") 1717 ), 1718 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1719 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1720 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1721 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1722 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1723 exp.MakeInterval: no_make_interval_sql, 1724 exp.Max: max_or_greatest, 1725 exp.Min: min_or_least, 1726 exp.ParseJSON: lambda self, e: self.func( 1727 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1728 ), 1729 exp.ToBinary: lambda self, e: self.func( 1730 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1731 ), 1732 exp.ToBoolean: lambda self, e: self.func( 1733 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1734 ), 1735 exp.ToDouble: lambda self, e: self.func( 1736 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1737 ), 1738 exp.ToFile: lambda self, e: self.func( 1739 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1740 ), 1741 exp.ToNumber: lambda self, e: self.func( 1742 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1743 e.this, 1744 e.args.get("format"), 1745 e.args.get("precision"), 1746 e.args.get("scale"), 1747 ), 1748 exp.JSONFormat: rename_func("TO_JSON"), 1749 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1750 exp.PercentileCont: transforms.preprocess( 1751 [transforms.add_within_group_for_percentiles] 1752 ), 1753 exp.PercentileDisc: transforms.preprocess( 1754 [transforms.add_within_group_for_percentiles] 1755 ), 1756 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1757 exp.RegexpExtract: _regexpextract_sql, 1758 exp.RegexpExtractAll: _regexpextract_sql, 1759 exp.RegexpILike: _regexpilike_sql, 1760 exp.Rand: rename_func("RANDOM"), 1761 exp.Select: transforms.preprocess( 1762 [ 1763 transforms.eliminate_window_clause, 1764 transforms.eliminate_distinct_on, 1765 transforms.explode_projection_to_unnest(), 1766 transforms.eliminate_semi_and_anti_joins, 1767 _transform_generate_date_array, 1768 _qualify_unnested_columns, 1769 _eliminate_dot_variant_lookup, 1770 ] 1771 ), 1772 exp.SHA: rename_func("SHA1"), 1773 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1774 exp.MD5Digest: rename_func("MD5_BINARY"), 1775 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1776 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1777 exp.LowerHex: rename_func("TO_CHAR"), 1778 exp.SortArray: rename_func("ARRAY_SORT"), 1779 exp.Skewness: rename_func("SKEW"), 1780 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1781 exp.StartsWith: rename_func("STARTSWITH"), 1782 exp.EndsWith: rename_func("ENDSWITH"), 1783 exp.StrPosition: lambda self, e: strposition_sql( 1784 self, e, func_name="CHARINDEX", supports_position=True 1785 ), 1786 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1787 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1788 exp.Stuff: rename_func("INSERT"), 1789 exp.StPoint: rename_func("ST_MAKEPOINT"), 1790 exp.TimeAdd: date_delta_sql("TIMEADD"), 1791 exp.TimeSlice: lambda self, e: self.func( 1792 "TIME_SLICE", 1793 e.this, 1794 e.expression, 1795 unit_to_str(e), 1796 e.args.get("kind"), 1797 ), 1798 exp.Timestamp: no_timestamp_sql, 1799 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1800 exp.TimestampDiff: lambda self, e: self.func( 1801 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1802 ), 1803 exp.TimestampTrunc: timestamptrunc_sql(), 1804 exp.TimeStrToTime: timestrtotime_sql, 1805 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1806 exp.ToArray: rename_func("TO_ARRAY"), 1807 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1808 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1809 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1810 exp.TsOrDsToDate: lambda self, e: self.func( 1811 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1812 ), 1813 exp.TsOrDsToTime: lambda self, e: self.func( 1814 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1815 ), 1816 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1817 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1818 exp.Uuid: rename_func("UUID_STRING"), 1819 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1820 exp.Booland: rename_func("BOOLAND"), 1821 exp.Boolor: rename_func("BOOLOR"), 1822 exp.WeekOfYear: rename_func("WEEKISO"), 1823 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1824 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1825 exp.Xor: rename_func("BOOLXOR"), 1826 exp.ByteLength: rename_func("OCTET_LENGTH"), 1827 exp.Flatten: rename_func("ARRAY_FLATTEN"), 1828 exp.ArrayConcatAgg: lambda self, e: self.func( 1829 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1830 ), 1831 exp.SHA2Digest: lambda self, e: self.func( 1832 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1833 ), 1834 } 1835 1836 def nthvalue_sql(self, expression: exp.NthValue) -> str: 1837 result = self.func("NTH_VALUE", expression.this, expression.args.get("offset")) 1838 1839 from_first = expression.args.get("from_first") 1840 1841 if from_first is not None: 1842 if from_first: 1843 result = result + " FROM FIRST" 1844 else: 1845 result = result + " FROM LAST" 1846 1847 return result 1848 1849 SUPPORTED_JSON_PATH_PARTS = { 1850 exp.JSONPathKey, 1851 exp.JSONPathRoot, 1852 exp.JSONPathSubscript, 1853 } 1854 1855 TYPE_MAPPING = { 1856 **generator.Generator.TYPE_MAPPING, 1857 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1858 exp.DataType.Type.NESTED: "OBJECT", 1859 exp.DataType.Type.STRUCT: "OBJECT", 1860 exp.DataType.Type.TEXT: "VARCHAR", 1861 } 1862 1863 TOKEN_MAPPING = { 1864 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1865 } 1866 1867 PROPERTIES_LOCATION = { 1868 **generator.Generator.PROPERTIES_LOCATION, 1869 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1870 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1871 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1872 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1873 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1874 } 1875 1876 UNSUPPORTED_VALUES_EXPRESSIONS = { 1877 exp.Map, 1878 exp.StarMap, 1879 exp.Struct, 1880 exp.VarMap, 1881 } 1882 1883 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1884 1885 def with_properties(self, properties: exp.Properties) -> str: 1886 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1887 1888 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1889 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1890 values_as_table = False 1891 1892 return super().values_sql(expression, values_as_table=values_as_table) 1893 1894 def datatype_sql(self, expression: exp.DataType) -> str: 1895 # Check if this is a FLOAT type nested inside a VECTOR type 1896 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1897 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1898 if expression.is_type(exp.DataType.Type.DOUBLE): 1899 parent = expression.parent 1900 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1901 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1902 return "FLOAT" 1903 1904 expressions = expression.expressions 1905 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1906 for field_type in expressions: 1907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1908 if isinstance(field_type, exp.DataType): 1909 return "OBJECT" 1910 if ( 1911 isinstance(field_type, exp.ColumnDef) 1912 and field_type.this 1913 and field_type.this.is_string 1914 ): 1915 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1916 # converting 'foo' into an identifier, we also need to quote it because these 1917 # keys are case-sensitive. For example: 1918 # 1919 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1920 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1921 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1922 1923 return super().datatype_sql(expression) 1924 1925 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1926 return self.func( 1927 "TO_NUMBER", 1928 expression.this, 1929 expression.args.get("format"), 1930 expression.args.get("precision"), 1931 expression.args.get("scale"), 1932 ) 1933 1934 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1935 milli = expression.args.get("milli") 1936 if milli is not None: 1937 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1938 expression.set("nano", milli_to_nano) 1939 1940 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1941 1942 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1943 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1944 return self.func("TO_GEOGRAPHY", expression.this) 1945 if expression.is_type(exp.DataType.Type.GEOMETRY): 1946 return self.func("TO_GEOMETRY", expression.this) 1947 1948 return super().cast_sql(expression, safe_prefix=safe_prefix) 1949 1950 def trycast_sql(self, expression: exp.TryCast) -> str: 1951 value = expression.this 1952 1953 if value.type is None: 1954 from sqlglot.optimizer.annotate_types import annotate_types 1955 1956 value = annotate_types(value, dialect=self.dialect) 1957 1958 # Snowflake requires that TRY_CAST's value be a string 1959 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1960 # if we can deduce that the value is a string, then we can generate TRY_CAST 1961 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1962 return super().trycast_sql(expression) 1963 1964 return self.cast_sql(expression) 1965 1966 def log_sql(self, expression: exp.Log) -> str: 1967 if not expression.expression: 1968 return self.func("LN", expression.this) 1969 1970 return super().log_sql(expression) 1971 1972 def greatest_sql(self, expression: exp.Greatest) -> str: 1973 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1974 return self.func(name, expression.this, *expression.expressions) 1975 1976 def least_sql(self, expression: exp.Least) -> str: 1977 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1978 return self.func(name, expression.this, *expression.expressions) 1979 1980 def generator_sql(self, expression: exp.Generator) -> str: 1981 args = [] 1982 rowcount = expression.args.get("rowcount") 1983 time_limit = expression.args.get("time_limit") 1984 1985 if rowcount: 1986 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1987 if time_limit: 1988 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1989 1990 return self.func("GENERATOR", *args) 1991 1992 def unnest_sql(self, expression: exp.Unnest) -> str: 1993 unnest_alias = expression.args.get("alias") 1994 offset = expression.args.get("offset") 1995 1996 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1997 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1998 1999 columns = [ 2000 exp.to_identifier("seq"), 2001 exp.to_identifier("key"), 2002 exp.to_identifier("path"), 2003 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 2004 value, 2005 exp.to_identifier("this"), 2006 ] 2007 2008 if unnest_alias: 2009 unnest_alias.set("columns", columns) 2010 else: 2011 unnest_alias = exp.TableAlias(this="_u", columns=columns) 2012 2013 table_input = self.sql(expression.expressions[0]) 2014 if not table_input.startswith("INPUT =>"): 2015 table_input = f"INPUT => {table_input}" 2016 2017 expression_parent = expression.parent 2018 2019 explode = ( 2020 f"FLATTEN({table_input})" 2021 if isinstance(expression_parent, exp.Lateral) 2022 else f"TABLE(FLATTEN({table_input}))" 2023 ) 2024 alias = self.sql(unnest_alias) 2025 alias = f" AS {alias}" if alias else "" 2026 value = ( 2027 "" 2028 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 2029 else f"{value} FROM " 2030 ) 2031 2032 return f"{value}{explode}{alias}" 2033 2034 def show_sql(self, expression: exp.Show) -> str: 2035 terse = "TERSE " if expression.args.get("terse") else "" 2036 history = " HISTORY" if expression.args.get("history") else "" 2037 like = self.sql(expression, "like") 2038 like = f" LIKE {like}" if like else "" 2039 2040 scope = self.sql(expression, "scope") 2041 scope = f" {scope}" if scope else "" 2042 2043 scope_kind = self.sql(expression, "scope_kind") 2044 if scope_kind: 2045 scope_kind = f" IN {scope_kind}" 2046 2047 starts_with = self.sql(expression, "starts_with") 2048 if starts_with: 2049 starts_with = f" STARTS WITH {starts_with}" 2050 2051 limit = self.sql(expression, "limit") 2052 2053 from_ = self.sql(expression, "from_") 2054 if from_: 2055 from_ = f" FROM {from_}" 2056 2057 privileges = self.expressions(expression, key="privileges", flat=True) 2058 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2059 2060 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 2061 2062 def describe_sql(self, expression: exp.Describe) -> str: 2063 # Default to table if kind is unknown 2064 kind_value = expression.args.get("kind") or "TABLE" 2065 kind = f" {kind_value}" if kind_value else "" 2066 this = f" {self.sql(expression, 'this')}" 2067 expressions = self.expressions(expression, flat=True) 2068 expressions = f" {expressions}" if expressions else "" 2069 return f"DESCRIBE{kind}{this}{expressions}" 2070 2071 def generatedasidentitycolumnconstraint_sql( 2072 self, expression: exp.GeneratedAsIdentityColumnConstraint 2073 ) -> str: 2074 start = expression.args.get("start") 2075 start = f" START {start}" if start else "" 2076 increment = expression.args.get("increment") 2077 increment = f" INCREMENT {increment}" if increment else "" 2078 2079 order = expression.args.get("order") 2080 if order is not None: 2081 order_clause = " ORDER" if order else " NOORDER" 2082 else: 2083 order_clause = "" 2084 2085 return f"AUTOINCREMENT{start}{increment}{order_clause}" 2086 2087 def cluster_sql(self, expression: exp.Cluster) -> str: 2088 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 2089 2090 def struct_sql(self, expression: exp.Struct) -> str: 2091 if len(expression.expressions) == 1: 2092 arg = expression.expressions[0] 2093 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2094 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2095 return f"{{{self.sql(expression.expressions[0])}}}" 2096 2097 keys = [] 2098 values = [] 2099 2100 for i, e in enumerate(expression.expressions): 2101 if isinstance(e, exp.PropertyEQ): 2102 keys.append( 2103 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2104 ) 2105 values.append(e.expression) 2106 else: 2107 keys.append(exp.Literal.string(f"_{i}")) 2108 values.append(e) 2109 2110 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 2111 2112 @unsupported_args("weight", "accuracy") 2113 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 2114 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 2115 2116 def alterset_sql(self, expression: exp.AlterSet) -> str: 2117 exprs = self.expressions(expression, flat=True) 2118 exprs = f" {exprs}" if exprs else "" 2119 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2120 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2121 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2122 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2123 tag = self.expressions(expression, key="tag", flat=True) 2124 tag = f" TAG {tag}" if tag else "" 2125 2126 return f"SET{exprs}{file_format}{copy_options}{tag}" 2127 2128 def strtotime_sql(self, expression: exp.StrToTime): 2129 # target_type is stored as a DataType instance 2130 target_type = expression.args.get("target_type") 2131 2132 # Get the type enum from DataType instance or from type annotation 2133 if isinstance(target_type, exp.DataType): 2134 type_enum = target_type.this 2135 elif expression.type: 2136 type_enum = expression.type.this 2137 else: 2138 type_enum = exp.DataType.Type.TIMESTAMP 2139 2140 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2141 2142 return self.func( 2143 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2144 expression.this, 2145 self.format_time(expression), 2146 ) 2147 2148 def timestampsub_sql(self, expression: exp.TimestampSub): 2149 return self.sql( 2150 exp.TimestampAdd( 2151 this=expression.this, 2152 expression=expression.expression * -1, 2153 unit=expression.unit, 2154 ) 2155 ) 2156 2157 def jsonextract_sql(self, expression: exp.JSONExtract): 2158 this = expression.this 2159 2160 # JSON strings are valid coming from other dialects such as BQ so 2161 # for these cases we PARSE_JSON preemptively 2162 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2163 "requires_json" 2164 ): 2165 this = exp.ParseJSON(this=this) 2166 2167 return self.func( 2168 "GET_PATH", 2169 this, 2170 expression.expression, 2171 ) 2172 2173 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2174 this = expression.this 2175 if this.is_string: 2176 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2177 2178 return self.func("TO_CHAR", this, self.format_time(expression)) 2179 2180 def datesub_sql(self, expression: exp.DateSub) -> str: 2181 value = expression.expression 2182 if value: 2183 value.replace(value * (-1)) 2184 else: 2185 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2186 2187 return date_delta_sql("DATEADD")(self, expression) 2188 2189 def select_sql(self, expression: exp.Select) -> str: 2190 limit = expression.args.get("limit") 2191 offset = expression.args.get("offset") 2192 if offset and not limit: 2193 expression.limit(exp.Null(), copy=False) 2194 return super().select_sql(expression) 2195 2196 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2197 is_materialized = expression.find(exp.MaterializedProperty) 2198 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2199 2200 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2201 # For materialized views, COPY GRANTS is located *before* the columns list 2202 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2203 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2204 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2205 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2206 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2207 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2208 2209 this_name = self.sql(expression.this, "this") 2210 copy_grants = self.sql(copy_grants_property) 2211 this_schema = self.schema_columns_sql(expression.this) 2212 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2213 2214 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2215 2216 return super().createable_sql(expression, locations) 2217 2218 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2219 this = expression.this 2220 2221 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2222 # and add it later as part of the WITHIN GROUP clause 2223 order = this if isinstance(this, exp.Order) else None 2224 if order: 2225 expression.set("this", order.this.pop()) 2226 2227 expr_sql = super().arrayagg_sql(expression) 2228 2229 if order: 2230 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2231 2232 return expr_sql 2233 2234 def array_sql(self, expression: exp.Array) -> str: 2235 expressions = expression.expressions 2236 2237 first_expr = seq_get(expressions, 0) 2238 if isinstance(first_expr, exp.Select): 2239 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2240 if first_expr.text("kind").upper() == "STRUCT": 2241 object_construct_args = [] 2242 for expr in first_expr.expressions: 2243 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2244 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2245 name = expr.this if isinstance(expr, exp.Alias) else expr 2246 2247 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2248 2249 array_agg = exp.ArrayAgg( 2250 this=_build_object_construct(args=object_construct_args) 2251 ) 2252 2253 first_expr.set("kind", None) 2254 first_expr.set("expressions", [array_agg]) 2255 2256 return self.sql(first_expr.subquery()) 2257 2258 return inline_array_sql(self, expression) 2259 2260 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2261 zone = self.sql(expression, "this") 2262 if not zone: 2263 return super().currentdate_sql(expression) 2264 2265 expr = exp.Cast( 2266 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2267 to=exp.DataType(this=exp.DataType.Type.DATE), 2268 ) 2269 return self.sql(expr) 2270 2271 def dot_sql(self, expression: exp.Dot) -> str: 2272 this = expression.this 2273 2274 if not this.type: 2275 from sqlglot.optimizer.annotate_types import annotate_types 2276 2277 this = annotate_types(this, dialect=self.dialect) 2278 2279 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2280 # Generate colon notation for the top level STRUCT 2281 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2282 2283 return super().dot_sql(expression) 2284 2285 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2286 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2287 2288 def format_sql(self, expression: exp.Format) -> str: 2289 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2290 return self.func("TO_CHAR", expression.expressions[0]) 2291 2292 return self.function_fallback_sql(expression) 2293 2294 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2295 # Set part_index to 1 if missing 2296 if not expression.args.get("delimiter"): 2297 expression.set("delimiter", exp.Literal.string(" ")) 2298 2299 if not expression.args.get("part_index"): 2300 expression.set("part_index", exp.Literal.number(1)) 2301 2302 return rename_func("SPLIT_PART")(self, expression) 2303 2304 def uniform_sql(self, expression: exp.Uniform) -> str: 2305 gen = expression.args.get("gen") 2306 seed = expression.args.get("seed") 2307 2308 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2309 if seed: 2310 gen = exp.Rand(this=seed) 2311 2312 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2313 if not gen: 2314 gen = exp.Rand() 2315 2316 return self.func("UNIFORM", expression.this, expression.expression, gen)
683class Snowflake(Dialect): 684 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 685 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 686 NULL_ORDERING = "nulls_are_large" 687 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 688 SUPPORTS_USER_DEFINED_TYPES = False 689 SUPPORTS_SEMI_ANTI_JOIN = False 690 PREFER_CTE_ALIAS_COLUMN = True 691 TABLESAMPLE_SIZE_IS_PERCENT = True 692 COPY_PARAMS_ARE_CSV = False 693 ARRAY_AGG_INCLUDES_NULLS = None 694 ARRAY_FUNCS_PROPAGATES_NULLS = True 695 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 696 TRY_CAST_REQUIRES_STRING = True 697 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 698 LEAST_GREATEST_IGNORES_NULLS = False 699 700 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 701 702 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 703 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 704 705 INVERSE_TIME_MAPPING = { 706 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 707 } 708 709 TIME_MAPPING = { 710 "YYYY": "%Y", 711 "yyyy": "%Y", 712 "YY": "%y", 713 "yy": "%y", 714 "MMMM": "%B", 715 "mmmm": "%B", 716 "MON": "%b", 717 "mon": "%b", 718 "MM": "%m", 719 "mm": "%m", 720 "DD": "%d", 721 "dd": "%-d", 722 "DY": "%a", 723 "dy": "%w", 724 "HH24": "%H", 725 "hh24": "%H", 726 "HH12": "%I", 727 "hh12": "%I", 728 "MI": "%M", 729 "mi": "%M", 730 "SS": "%S", 731 "ss": "%S", 732 "FF": "%f_nine", # %f_ internal representation with precision specified 733 "ff": "%f_nine", 734 "FF0": "%f_zero", 735 "ff0": "%f_zero", 736 "FF1": "%f_one", 737 "ff1": "%f_one", 738 "FF2": "%f_two", 739 "ff2": "%f_two", 740 "FF3": "%f_three", 741 "ff3": "%f_three", 742 "FF4": "%f_four", 743 "ff4": "%f_four", 744 "FF5": "%f_five", 745 "ff5": "%f_five", 746 "FF6": "%f", 747 "ff6": "%f", 748 "FF7": "%f_seven", 749 "ff7": "%f_seven", 750 "FF8": "%f_eight", 751 "ff8": "%f_eight", 752 "FF9": "%f_nine", 753 "ff9": "%f_nine", 754 "TZHTZM": "%z", 755 "tzhtzm": "%z", 756 "TZH:TZM": "%:z", # internal representation for ±HH:MM 757 "tzh:tzm": "%:z", 758 "TZH": "%-z", # internal representation ±HH 759 "tzh": "%-z", 760 '"T"': "T", # remove the optional double quotes around the separator between the date and time 761 # Seems like Snowflake treats AM/PM in the format string as equivalent, 762 # only the time (stamp) value's AM/PM affects the output 763 "AM": "%p", 764 "am": "%p", 765 "PM": "%p", 766 "pm": "%p", 767 } 768 769 DATE_PART_MAPPING = { 770 **Dialect.DATE_PART_MAPPING, 771 "ISOWEEK": "WEEKISO", 772 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 773 # EPOCH_SECOND as a distinct value for two reasons: 774 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 775 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 776 # to BIGINT to match Snowflake's integer behavior 777 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 778 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 779 "EPOCH_SECOND": "EPOCH_SECOND", 780 "EPOCH_SECONDS": "EPOCH_SECOND", 781 } 782 783 PSEUDOCOLUMNS = {"LEVEL"} 784 785 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 786 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 787 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 788 return super().can_quote(identifier, identify) and not ( 789 isinstance(identifier.parent, exp.Table) 790 and not identifier.quoted 791 and identifier.name.lower() == "dual" 792 ) 793 794 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 795 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 796 SINGLE_TOKENS.pop("$") 797 798 class Parser(parser.Parser): 799 IDENTIFY_PIVOT_STRINGS = True 800 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 801 COLON_IS_VARIANT_EXTRACT = True 802 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 803 804 ID_VAR_TOKENS = { 805 *parser.Parser.ID_VAR_TOKENS, 806 TokenType.EXCEPT, 807 TokenType.MATCH_CONDITION, 808 } 809 810 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 811 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 812 813 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 814 815 NO_PAREN_FUNCTIONS = { 816 **parser.Parser.NO_PAREN_FUNCTIONS, 817 TokenType.CURRENT_TIME: exp.Localtime, 818 } 819 820 FUNCTIONS = { 821 **parser.Parser.FUNCTIONS, 822 "ADD_MONTHS": lambda args: exp.AddMonths( 823 this=seq_get(args, 0), 824 expression=seq_get(args, 1), 825 preserve_end_of_month=True, 826 ), 827 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 828 "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), 829 "APPROX_TOP_K": _build_approx_top_k, 830 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 831 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 832 this=seq_get(args, 1), 833 expression=seq_get(args, 0), 834 ensure_variant=False, 835 check_null=True, 836 ), 837 "ARRAY_DISTINCT": lambda args: exp.ArrayDistinct( 838 this=seq_get(args, 0), 839 check_null=True, 840 ), 841 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 842 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 843 start=seq_get(args, 0), 844 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 845 step=seq_get(args, 2), 846 ), 847 "ARRAY_SORT": exp.SortArray.from_arg_list, 848 "ARRAY_FLATTEN": exp.Flatten.from_arg_list, 849 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 850 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 851 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 852 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 853 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 854 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 855 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 856 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 857 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 858 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 859 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 860 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 861 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 862 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 863 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 864 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 865 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 866 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 867 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 868 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 869 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 870 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 871 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 872 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 873 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 874 "BOOLAND": lambda args: exp.Booland( 875 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 876 ), 877 "BOOLOR": lambda args: exp.Boolor( 878 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 879 ), 880 "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), 881 "BOOLXOR": lambda args: exp.Xor( 882 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 883 ), 884 "CORR": lambda args: exp.Corr( 885 this=seq_get(args, 0), 886 expression=seq_get(args, 1), 887 null_on_zero_variance=True, 888 ), 889 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 890 "DATEFROMPARTS": _build_date_from_parts, 891 "DATE_FROM_PARTS": _build_date_from_parts, 892 "DATE_TRUNC": _date_trunc_to_time, 893 "DATEADD": _build_date_time_add(exp.DateAdd), 894 "DATEDIFF": _build_datediff, 895 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 896 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 897 "DIV0": _build_if_from_div0, 898 "DIV0NULL": _build_if_from_div0null, 899 "EDITDISTANCE": lambda args: exp.Levenshtein( 900 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 901 ), 902 "FLATTEN": exp.Explode.from_arg_list, 903 "GENERATOR": _build_generator, 904 "GET": exp.GetExtract.from_arg_list, 905 "GETDATE": exp.CurrentTimestamp.from_arg_list, 906 "GET_PATH": lambda args, dialect: exp.JSONExtract( 907 this=seq_get(args, 0), 908 expression=dialect.to_json_path(seq_get(args, 1)), 909 requires_json=True, 910 ), 911 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 912 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 913 ), 914 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 915 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 916 ), 917 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 918 "IFF": exp.If.from_arg_list, 919 "JAROWINKLER_SIMILARITY": lambda args: exp.JarowinklerSimilarity( 920 this=seq_get(args, 0), 921 expression=seq_get(args, 1), 922 case_insensitive=True, 923 ), 924 "MD5_HEX": exp.MD5.from_arg_list, 925 "MD5_BINARY": exp.MD5Digest.from_arg_list, 926 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 927 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 928 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 929 "LAST_DAY": lambda args: exp.LastDay( 930 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 931 ), 932 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 933 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 934 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 935 "NULLIFZERO": _build_if_from_nullifzero, 936 "OBJECT_CONSTRUCT": _build_object_construct, 937 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 938 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 939 "PARSE_URL": lambda args: exp.ParseUrl( 940 this=seq_get(args, 0), permissive=seq_get(args, 1) 941 ), 942 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 943 "REGEXP_LIKE": _build_regexp_like, 944 "REGEXP_REPLACE": _build_regexp_replace, 945 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 946 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 947 "REPLACE": build_replace_with_optional_replacement, 948 "RLIKE": _build_regexp_like, 949 "ROUND": _build_round, 950 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 951 "SHA1_HEX": exp.SHA.from_arg_list, 952 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 953 "SHA2_HEX": exp.SHA2.from_arg_list, 954 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 955 "STDDEV_SAMP": exp.Stddev.from_arg_list, 956 "STRTOK": _build_strtok, 957 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 958 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 959 "TIMEADD": _build_date_time_add(exp.TimeAdd), 960 "TIMEDIFF": _build_datediff, 961 "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( 962 hour=seq_get(args, 0), 963 min=seq_get(args, 1), 964 sec=seq_get(args, 2), 965 nano=seq_get(args, 3), 966 overflow=True, 967 ), 968 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 969 "TIMESTAMPDIFF": _build_datediff, 970 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 971 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 972 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 973 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 974 "TRUNC": lambda args, dialect: build_trunc( 975 args, dialect, date_trunc_requires_part=False 976 ), 977 "TRUNCATE": lambda args, dialect: build_trunc( 978 args, dialect, date_trunc_requires_part=False 979 ), 980 "TRY_DECRYPT": lambda args: exp.Decrypt( 981 this=seq_get(args, 0), 982 passphrase=seq_get(args, 1), 983 aad=seq_get(args, 2), 984 encryption_method=seq_get(args, 3), 985 safe=True, 986 ), 987 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 988 this=seq_get(args, 0), 989 key=seq_get(args, 1), 990 iv=seq_get(args, 2), 991 aad=seq_get(args, 3), 992 encryption_method=seq_get(args, 4), 993 aead=seq_get(args, 5), 994 safe=True, 995 ), 996 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 997 "TRY_TO_BINARY": lambda args: exp.ToBinary( 998 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 999 ), 1000 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 1001 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 1002 **dict.fromkeys( 1003 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 1004 ), 1005 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 1006 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 1007 ), 1008 "TRY_TO_FILE": lambda args: exp.ToFile( 1009 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 1010 ), 1011 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 1012 "TRY_TO_TIMESTAMP": _build_datetime( 1013 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 1014 ), 1015 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 1016 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 1017 ), 1018 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 1019 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 1020 ), 1021 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 1022 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 1023 ), 1024 "TO_CHAR": build_timetostr_or_tochar, 1025 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 1026 **dict.fromkeys( 1027 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 1028 lambda args: exp.ToNumber( 1029 this=seq_get(args, 0), 1030 format=seq_get(args, 1), 1031 precision=seq_get(args, 2), 1032 scale=seq_get(args, 3), 1033 ), 1034 ), 1035 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 1036 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 1037 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 1038 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 1039 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 1040 "TO_VARCHAR": build_timetostr_or_tochar, 1041 "TO_JSON": exp.JSONFormat.from_arg_list, 1042 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 1043 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 1044 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 1045 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 1046 "ZEROIFNULL": _build_if_from_zeroifnull, 1047 "LIKE": build_like(exp.Like), 1048 "ILIKE": build_like(exp.ILike), 1049 "SEARCH": _build_search, 1050 "SKEW": exp.Skewness.from_arg_list, 1051 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 1052 "WEEKISO": exp.WeekOfYear.from_arg_list, 1053 "WEEKOFYEAR": exp.Week.from_arg_list, 1054 } 1055 FUNCTIONS.pop("PREDICT") 1056 1057 FUNCTION_PARSERS = { 1058 **parser.Parser.FUNCTION_PARSERS, 1059 "DATE_PART": lambda self: self._parse_date_part(), 1060 "DIRECTORY": lambda self: self._parse_directory(), 1061 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 1062 "LISTAGG": lambda self: self._parse_string_agg(), 1063 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 1064 } 1065 FUNCTION_PARSERS.pop("TRIM") 1066 1067 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 1068 1069 ALTER_PARSERS = { 1070 **parser.Parser.ALTER_PARSERS, 1071 "MODIFY": lambda self: self._parse_alter_table_alter(), 1072 "SESSION": lambda self: self._parse_alter_session(), 1073 "UNSET": lambda self: self.expression( 1074 exp.Set, 1075 tag=self._match_text_seq("TAG"), 1076 expressions=self._parse_csv(self._parse_id_var), 1077 unset=True, 1078 ), 1079 } 1080 1081 STATEMENT_PARSERS = { 1082 **parser.Parser.STATEMENT_PARSERS, 1083 TokenType.GET: lambda self: self._parse_get(), 1084 TokenType.PUT: lambda self: self._parse_put(), 1085 TokenType.SHOW: lambda self: self._parse_show(), 1086 } 1087 1088 PROPERTY_PARSERS = { 1089 **parser.Parser.PROPERTY_PARSERS, 1090 "CREDENTIALS": lambda self: self._parse_credentials_property(), 1091 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 1092 "LOCATION": lambda self: self._parse_location_property(), 1093 "TAG": lambda self: self._parse_tag(), 1094 "USING": lambda self: self._match_text_seq("TEMPLATE") 1095 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 1096 } 1097 1098 TYPE_CONVERTERS = { 1099 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 1100 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 1101 } 1102 1103 SHOW_PARSERS = { 1104 "DATABASES": _show_parser("DATABASES"), 1105 "TERSE DATABASES": _show_parser("DATABASES"), 1106 "SCHEMAS": _show_parser("SCHEMAS"), 1107 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 1108 "OBJECTS": _show_parser("OBJECTS"), 1109 "TERSE OBJECTS": _show_parser("OBJECTS"), 1110 "TABLES": _show_parser("TABLES"), 1111 "TERSE TABLES": _show_parser("TABLES"), 1112 "VIEWS": _show_parser("VIEWS"), 1113 "TERSE VIEWS": _show_parser("VIEWS"), 1114 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1115 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1116 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1117 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1118 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1119 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1120 "SEQUENCES": _show_parser("SEQUENCES"), 1121 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1122 "STAGES": _show_parser("STAGES"), 1123 "COLUMNS": _show_parser("COLUMNS"), 1124 "USERS": _show_parser("USERS"), 1125 "TERSE USERS": _show_parser("USERS"), 1126 "FILE FORMATS": _show_parser("FILE FORMATS"), 1127 "FUNCTIONS": _show_parser("FUNCTIONS"), 1128 "PROCEDURES": _show_parser("PROCEDURES"), 1129 "WAREHOUSES": _show_parser("WAREHOUSES"), 1130 } 1131 1132 CONSTRAINT_PARSERS = { 1133 **parser.Parser.CONSTRAINT_PARSERS, 1134 "WITH": lambda self: self._parse_with_constraint(), 1135 "MASKING": lambda self: self._parse_with_constraint(), 1136 "PROJECTION": lambda self: self._parse_with_constraint(), 1137 "TAG": lambda self: self._parse_with_constraint(), 1138 } 1139 1140 STAGED_FILE_SINGLE_TOKENS = { 1141 TokenType.DOT, 1142 TokenType.MOD, 1143 TokenType.SLASH, 1144 } 1145 1146 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1147 1148 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1149 1150 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1151 1152 LAMBDAS = { 1153 **parser.Parser.LAMBDAS, 1154 TokenType.ARROW: lambda self, expressions: self.expression( 1155 exp.Lambda, 1156 this=self._replace_lambda( 1157 self._parse_assignment(), 1158 expressions, 1159 ), 1160 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1161 ), 1162 } 1163 1164 COLUMN_OPERATORS = { 1165 **parser.Parser.COLUMN_OPERATORS, 1166 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1167 exp.ModelAttribute, this=this, expression=attr 1168 ), 1169 } 1170 1171 def _parse_directory(self) -> exp.DirectoryStage: 1172 table = self._parse_table_parts() 1173 1174 if isinstance(table, exp.Table): 1175 table = table.this 1176 1177 return self.expression(exp.DirectoryStage, this=table) 1178 1179 def _parse_use(self) -> exp.Use: 1180 if self._match_text_seq("SECONDARY", "ROLES"): 1181 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1182 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1183 return self.expression( 1184 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1185 ) 1186 1187 return super()._parse_use() 1188 1189 def _negate_range( 1190 self, this: t.Optional[exp.Expression] = None 1191 ) -> t.Optional[exp.Expression]: 1192 if not this: 1193 return this 1194 1195 query = this.args.get("query") 1196 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1197 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1198 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1199 # which can produce different results (most likely a SnowFlake bug). 1200 # 1201 # https://docs.snowflake.com/en/sql-reference/functions/in 1202 # Context: https://github.com/tobymao/sqlglot/issues/3890 1203 return self.expression( 1204 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1205 ) 1206 1207 return self.expression(exp.Not, this=this) 1208 1209 def _parse_tag(self) -> exp.Tags: 1210 return self.expression( 1211 exp.Tags, 1212 expressions=self._parse_wrapped_csv(self._parse_property), 1213 ) 1214 1215 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1216 if self._prev.token_type != TokenType.WITH: 1217 self._retreat(self._index - 1) 1218 1219 if self._match_text_seq("MASKING", "POLICY"): 1220 policy = self._parse_column() 1221 return self.expression( 1222 exp.MaskingPolicyColumnConstraint, 1223 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1224 expressions=self._match(TokenType.USING) 1225 and self._parse_wrapped_csv(self._parse_id_var), 1226 ) 1227 if self._match_text_seq("PROJECTION", "POLICY"): 1228 policy = self._parse_column() 1229 return self.expression( 1230 exp.ProjectionPolicyColumnConstraint, 1231 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1232 ) 1233 if self._match(TokenType.TAG): 1234 return self._parse_tag() 1235 1236 return None 1237 1238 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1239 if self._match(TokenType.TAG): 1240 return self._parse_tag() 1241 1242 return super()._parse_with_property() 1243 1244 def _parse_create(self) -> exp.Create | exp.Command: 1245 expression = super()._parse_create() 1246 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1247 # Replace the Table node with the enclosed Identifier 1248 expression.this.replace(expression.this.this) 1249 1250 return expression 1251 1252 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1253 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1254 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1255 this = self._parse_var() or self._parse_type() 1256 1257 if not this: 1258 return None 1259 1260 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1261 expression = ( 1262 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1263 ) 1264 return self.expression( 1265 exp.Extract, this=map_date_part(this, self.dialect), expression=expression 1266 ) 1267 1268 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1269 if is_map: 1270 # Keys are strings in Snowflake's objects, see also: 1271 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1272 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1273 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1274 1275 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1276 1277 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1278 lateral = super()._parse_lateral() 1279 if not lateral: 1280 return lateral 1281 1282 if isinstance(lateral.this, exp.Explode): 1283 table_alias = lateral.args.get("alias") 1284 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1285 if table_alias and not table_alias.args.get("columns"): 1286 table_alias.set("columns", columns) 1287 elif not table_alias: 1288 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1289 1290 return lateral 1291 1292 def _parse_table_parts( 1293 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1294 ) -> exp.Table: 1295 # https://docs.snowflake.com/en/user-guide/querying-stage 1296 if self._match(TokenType.STRING, advance=False): 1297 table = self._parse_string() 1298 elif self._match_text_seq("@", advance=False): 1299 table = self._parse_location_path() 1300 else: 1301 table = None 1302 1303 if table: 1304 file_format = None 1305 pattern = None 1306 1307 wrapped = self._match(TokenType.L_PAREN) 1308 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1309 if self._match_text_seq("FILE_FORMAT", "=>"): 1310 file_format = self._parse_string() or super()._parse_table_parts( 1311 is_db_reference=is_db_reference 1312 ) 1313 elif self._match_text_seq("PATTERN", "=>"): 1314 pattern = self._parse_string() 1315 else: 1316 break 1317 1318 self._match(TokenType.COMMA) 1319 1320 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1321 else: 1322 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1323 1324 return table 1325 1326 def _parse_table( 1327 self, 1328 schema: bool = False, 1329 joins: bool = False, 1330 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1331 parse_bracket: bool = False, 1332 is_db_reference: bool = False, 1333 parse_partition: bool = False, 1334 consume_pipe: bool = False, 1335 ) -> t.Optional[exp.Expression]: 1336 table = super()._parse_table( 1337 schema=schema, 1338 joins=joins, 1339 alias_tokens=alias_tokens, 1340 parse_bracket=parse_bracket, 1341 is_db_reference=is_db_reference, 1342 parse_partition=parse_partition, 1343 ) 1344 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1345 table_from_rows = table.this 1346 for arg in exp.TableFromRows.arg_types: 1347 if arg != "this": 1348 table_from_rows.set(arg, table.args.get(arg)) 1349 1350 table = table_from_rows 1351 1352 return table 1353 1354 def _parse_id_var( 1355 self, 1356 any_token: bool = True, 1357 tokens: t.Optional[t.Collection[TokenType]] = None, 1358 ) -> t.Optional[exp.Expression]: 1359 if self._match_text_seq("IDENTIFIER", "("): 1360 identifier = ( 1361 super()._parse_id_var(any_token=any_token, tokens=tokens) 1362 or self._parse_string() 1363 ) 1364 self._match_r_paren() 1365 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1366 1367 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1368 1369 def _parse_show_snowflake(self, this: str) -> exp.Show: 1370 scope = None 1371 scope_kind = None 1372 1373 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1374 # which is syntactically valid but has no effect on the output 1375 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1376 1377 history = self._match_text_seq("HISTORY") 1378 1379 like = self._parse_string() if self._match(TokenType.LIKE) else None 1380 1381 if self._match(TokenType.IN): 1382 if self._match_text_seq("ACCOUNT"): 1383 scope_kind = "ACCOUNT" 1384 elif self._match_text_seq("CLASS"): 1385 scope_kind = "CLASS" 1386 scope = self._parse_table_parts() 1387 elif self._match_text_seq("APPLICATION"): 1388 scope_kind = "APPLICATION" 1389 if self._match_text_seq("PACKAGE"): 1390 scope_kind += " PACKAGE" 1391 scope = self._parse_table_parts() 1392 elif self._match_set(self.DB_CREATABLES): 1393 scope_kind = self._prev.text.upper() 1394 if self._curr: 1395 scope = self._parse_table_parts() 1396 elif self._curr: 1397 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1398 scope = self._parse_table_parts() 1399 1400 return self.expression( 1401 exp.Show, 1402 terse=terse, 1403 this=this, 1404 history=history, 1405 like=like, 1406 scope=scope, 1407 scope_kind=scope_kind, 1408 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1409 limit=self._parse_limit(), 1410 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1411 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1412 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1413 ) 1414 1415 def _parse_put(self) -> exp.Put | exp.Command: 1416 if self._curr.token_type != TokenType.STRING: 1417 return self._parse_as_command(self._prev) 1418 1419 return self.expression( 1420 exp.Put, 1421 this=self._parse_string(), 1422 target=self._parse_location_path(), 1423 properties=self._parse_properties(), 1424 ) 1425 1426 def _parse_get(self) -> t.Optional[exp.Expression]: 1427 start = self._prev 1428 1429 # If we detect GET( then we need to parse a function, not a statement 1430 if self._match(TokenType.L_PAREN): 1431 self._retreat(self._index - 2) 1432 return self._parse_expression() 1433 1434 target = self._parse_location_path() 1435 1436 # Parse as command if unquoted file path 1437 if self._curr.token_type == TokenType.URI_START: 1438 return self._parse_as_command(start) 1439 1440 return self.expression( 1441 exp.Get, 1442 this=self._parse_string(), 1443 target=target, 1444 properties=self._parse_properties(), 1445 ) 1446 1447 def _parse_location_property(self) -> exp.LocationProperty: 1448 self._match(TokenType.EQ) 1449 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1450 1451 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1452 # Parse either a subquery or a staged file 1453 return ( 1454 self._parse_select(table=True, parse_subquery_alias=False) 1455 if self._match(TokenType.L_PAREN, advance=False) 1456 else self._parse_table_parts() 1457 ) 1458 1459 def _parse_location_path(self) -> exp.Var: 1460 start = self._curr 1461 self._advance_any(ignore_reserved=True) 1462 1463 # We avoid consuming a comma token because external tables like @foo and @bar 1464 # can be joined in a query with a comma separator, as well as closing paren 1465 # in case of subqueries 1466 while self._is_connected() and not self._match_set( 1467 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1468 ): 1469 self._advance_any(ignore_reserved=True) 1470 1471 return exp.var(self._find_sql(start, self._prev)) 1472 1473 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1474 this = super()._parse_lambda_arg() 1475 1476 if not this: 1477 return this 1478 1479 typ = self._parse_types() 1480 1481 if typ: 1482 return self.expression(exp.Cast, this=this, to=typ) 1483 1484 return this 1485 1486 def _parse_foreign_key(self) -> exp.ForeignKey: 1487 # inlineFK, the REFERENCES columns are implied 1488 if self._match(TokenType.REFERENCES, advance=False): 1489 return self.expression(exp.ForeignKey) 1490 1491 # outoflineFK, explicitly names the columns 1492 return super()._parse_foreign_key() 1493 1494 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1495 self._match(TokenType.EQ) 1496 if self._match(TokenType.L_PAREN, advance=False): 1497 expressions = self._parse_wrapped_options() 1498 else: 1499 expressions = [self._parse_format_name()] 1500 1501 return self.expression( 1502 exp.FileFormatProperty, 1503 expressions=expressions, 1504 ) 1505 1506 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1507 return self.expression( 1508 exp.CredentialsProperty, 1509 expressions=self._parse_wrapped_options(), 1510 ) 1511 1512 def _parse_semantic_view(self) -> exp.SemanticView: 1513 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1514 1515 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1516 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1517 keyword = self._prev.text.lower() 1518 kwargs[keyword] = self._parse_csv( 1519 lambda: self._parse_alias(self._parse_disjunction(), explicit=True) 1520 ) 1521 elif self._match_text_seq("WHERE"): 1522 kwargs["where"] = self._parse_expression() 1523 else: 1524 self.raise_error("Expecting ) or encountered unexpected keyword") 1525 break 1526 1527 return self.expression(exp.SemanticView, **kwargs) 1528 1529 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1530 set = super()._parse_set(unset=unset, tag=tag) 1531 1532 if isinstance(set, exp.Set): 1533 for expr in set.expressions: 1534 if isinstance(expr, exp.SetItem): 1535 expr.set("kind", "VARIABLE") 1536 return set 1537 1538 def _parse_window( 1539 self, this: t.Optional[exp.Expression], alias: bool = False 1540 ) -> t.Optional[exp.Expression]: 1541 if isinstance(this, exp.NthValue): 1542 if self._match_text_seq("FROM"): 1543 if self._match_texts(("FIRST", "LAST")): 1544 from_first = self._prev.text.upper() == "FIRST" 1545 this.set("from_first", from_first) 1546 1547 result = super()._parse_window(this, alias) 1548 1549 return result 1550 1551 class Tokenizer(tokens.Tokenizer): 1552 STRING_ESCAPES = ["\\", "'"] 1553 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1554 RAW_STRINGS = ["$$"] 1555 COMMENTS = ["--", "//", ("/*", "*/")] 1556 NESTED_COMMENTS = False 1557 1558 KEYWORDS = { 1559 **tokens.Tokenizer.KEYWORDS, 1560 "BYTEINT": TokenType.INT, 1561 "FILE://": TokenType.URI_START, 1562 "FILE FORMAT": TokenType.FILE_FORMAT, 1563 "GET": TokenType.GET, 1564 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1565 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1566 "MINUS": TokenType.EXCEPT, 1567 "NCHAR VARYING": TokenType.VARCHAR, 1568 "PUT": TokenType.PUT, 1569 "REMOVE": TokenType.COMMAND, 1570 "RM": TokenType.COMMAND, 1571 "SAMPLE": TokenType.TABLE_SAMPLE, 1572 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1573 "SQL_DOUBLE": TokenType.DOUBLE, 1574 "SQL_VARCHAR": TokenType.VARCHAR, 1575 "STAGE": TokenType.STAGE, 1576 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1577 "STREAMLIT": TokenType.STREAMLIT, 1578 "TAG": TokenType.TAG, 1579 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1580 "TOP": TokenType.TOP, 1581 "WAREHOUSE": TokenType.WAREHOUSE, 1582 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1583 # FLOAT is a synonym for DOUBLE in Snowflake 1584 "FLOAT": TokenType.DOUBLE, 1585 } 1586 KEYWORDS.pop("/*+") 1587 1588 SINGLE_TOKENS = { 1589 **tokens.Tokenizer.SINGLE_TOKENS, 1590 "$": TokenType.PARAMETER, 1591 "!": TokenType.EXCLAMATION, 1592 } 1593 1594 VAR_SINGLE_TOKENS = {"$"} 1595 1596 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1597 1598 class Generator(generator.Generator): 1599 PARAMETER_TOKEN = "$" 1600 MATCHED_BY_SOURCE = False 1601 SINGLE_STRING_INTERVAL = True 1602 JOIN_HINTS = False 1603 TABLE_HINTS = False 1604 QUERY_HINTS = False 1605 AGGREGATE_FILTER_SUPPORTED = False 1606 SUPPORTS_TABLE_COPY = False 1607 COLLATE_IS_FUNC = True 1608 LIMIT_ONLY_LITERALS = True 1609 JSON_KEY_VALUE_PAIR_SEP = "," 1610 INSERT_OVERWRITE = " OVERWRITE INTO" 1611 STRUCT_DELIMITER = ("(", ")") 1612 COPY_PARAMS_ARE_WRAPPED = False 1613 COPY_PARAMS_EQ_REQUIRED = True 1614 STAR_EXCEPT = "EXCLUDE" 1615 SUPPORTS_EXPLODING_PROJECTIONS = False 1616 ARRAY_CONCAT_IS_VAR_LEN = False 1617 SUPPORTS_CONVERT_TIMEZONE = True 1618 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1619 SUPPORTS_MEDIAN = True 1620 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1621 SUPPORTS_DECODE_CASE = True 1622 IS_BOOL_ALLOWED = False 1623 DIRECTED_JOINS = True 1624 1625 TRANSFORMS = { 1626 **generator.Generator.TRANSFORMS, 1627 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1628 exp.ArgMax: rename_func("MAX_BY"), 1629 exp.ArgMin: rename_func("MIN_BY"), 1630 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1631 exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), 1632 exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), 1633 exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), 1634 exp.ArrayContains: lambda self, e: self.func( 1635 "ARRAY_CONTAINS", 1636 e.expression 1637 if e.args.get("ensure_variant") is False 1638 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1639 e.this, 1640 ), 1641 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1642 exp.AtTimeZone: lambda self, e: self.func( 1643 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1644 ), 1645 exp.BitwiseOr: rename_func("BITOR"), 1646 exp.BitwiseXor: rename_func("BITXOR"), 1647 exp.BitwiseAnd: rename_func("BITAND"), 1648 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1649 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1650 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1651 exp.BitwiseNot: rename_func("BITNOT"), 1652 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1653 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1654 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1655 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1656 if e.args.get("sysdate") 1657 else self.function_fallback_sql(e), 1658 exp.CurrentSchemas: lambda self, e: self.func("CURRENT_SCHEMAS"), 1659 exp.Localtime: lambda self, e: self.func("CURRENT_TIME", e.this) 1660 if e.this 1661 else "CURRENT_TIME", 1662 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1663 if e.this 1664 else "CURRENT_TIMESTAMP", 1665 exp.DateAdd: date_delta_sql("DATEADD"), 1666 exp.DateDiff: date_delta_sql("DATEDIFF"), 1667 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1668 exp.DatetimeDiff: timestampdiff_sql, 1669 exp.DateStrToDate: datestrtodate_sql, 1670 exp.Decrypt: lambda self, e: self.func( 1671 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1672 e.this, 1673 e.args.get("passphrase"), 1674 e.args.get("aad"), 1675 e.args.get("encryption_method"), 1676 ), 1677 exp.DecryptRaw: lambda self, e: self.func( 1678 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1679 e.this, 1680 e.args.get("key"), 1681 e.args.get("iv"), 1682 e.args.get("aad"), 1683 e.args.get("encryption_method"), 1684 e.args.get("aead"), 1685 ), 1686 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1687 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1688 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1689 exp.DayOfYear: rename_func("DAYOFYEAR"), 1690 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1691 exp.Explode: rename_func("FLATTEN"), 1692 exp.Extract: lambda self, e: self.func( 1693 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1694 ), 1695 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1696 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1697 exp.FileFormatProperty: lambda self, 1698 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1699 exp.FromTimeZone: lambda self, e: self.func( 1700 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1701 ), 1702 exp.GenerateSeries: lambda self, e: self.func( 1703 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1704 ), 1705 exp.GetExtract: rename_func("GET"), 1706 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1707 exp.If: if_sql(name="IFF", false_value="NULL"), 1708 exp.JSONExtractArray: _json_extract_value_array_sql, 1709 exp.JSONExtractScalar: lambda self, e: self.func( 1710 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1711 ), 1712 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1713 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1714 exp.JSONPathRoot: lambda *_: "", 1715 exp.JSONValueArray: _json_extract_value_array_sql, 1716 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1717 rename_func("EDITDISTANCE") 1718 ), 1719 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1720 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1721 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1722 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1723 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1724 exp.MakeInterval: no_make_interval_sql, 1725 exp.Max: max_or_greatest, 1726 exp.Min: min_or_least, 1727 exp.ParseJSON: lambda self, e: self.func( 1728 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1729 ), 1730 exp.ToBinary: lambda self, e: self.func( 1731 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1732 ), 1733 exp.ToBoolean: lambda self, e: self.func( 1734 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1735 ), 1736 exp.ToDouble: lambda self, e: self.func( 1737 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1738 ), 1739 exp.ToFile: lambda self, e: self.func( 1740 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1741 ), 1742 exp.ToNumber: lambda self, e: self.func( 1743 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1744 e.this, 1745 e.args.get("format"), 1746 e.args.get("precision"), 1747 e.args.get("scale"), 1748 ), 1749 exp.JSONFormat: rename_func("TO_JSON"), 1750 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1751 exp.PercentileCont: transforms.preprocess( 1752 [transforms.add_within_group_for_percentiles] 1753 ), 1754 exp.PercentileDisc: transforms.preprocess( 1755 [transforms.add_within_group_for_percentiles] 1756 ), 1757 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1758 exp.RegexpExtract: _regexpextract_sql, 1759 exp.RegexpExtractAll: _regexpextract_sql, 1760 exp.RegexpILike: _regexpilike_sql, 1761 exp.Rand: rename_func("RANDOM"), 1762 exp.Select: transforms.preprocess( 1763 [ 1764 transforms.eliminate_window_clause, 1765 transforms.eliminate_distinct_on, 1766 transforms.explode_projection_to_unnest(), 1767 transforms.eliminate_semi_and_anti_joins, 1768 _transform_generate_date_array, 1769 _qualify_unnested_columns, 1770 _eliminate_dot_variant_lookup, 1771 ] 1772 ), 1773 exp.SHA: rename_func("SHA1"), 1774 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1775 exp.MD5Digest: rename_func("MD5_BINARY"), 1776 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1777 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1778 exp.LowerHex: rename_func("TO_CHAR"), 1779 exp.SortArray: rename_func("ARRAY_SORT"), 1780 exp.Skewness: rename_func("SKEW"), 1781 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1782 exp.StartsWith: rename_func("STARTSWITH"), 1783 exp.EndsWith: rename_func("ENDSWITH"), 1784 exp.StrPosition: lambda self, e: strposition_sql( 1785 self, e, func_name="CHARINDEX", supports_position=True 1786 ), 1787 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1788 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1789 exp.Stuff: rename_func("INSERT"), 1790 exp.StPoint: rename_func("ST_MAKEPOINT"), 1791 exp.TimeAdd: date_delta_sql("TIMEADD"), 1792 exp.TimeSlice: lambda self, e: self.func( 1793 "TIME_SLICE", 1794 e.this, 1795 e.expression, 1796 unit_to_str(e), 1797 e.args.get("kind"), 1798 ), 1799 exp.Timestamp: no_timestamp_sql, 1800 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1801 exp.TimestampDiff: lambda self, e: self.func( 1802 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1803 ), 1804 exp.TimestampTrunc: timestamptrunc_sql(), 1805 exp.TimeStrToTime: timestrtotime_sql, 1806 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1807 exp.ToArray: rename_func("TO_ARRAY"), 1808 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1809 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1810 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1811 exp.TsOrDsToDate: lambda self, e: self.func( 1812 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1813 ), 1814 exp.TsOrDsToTime: lambda self, e: self.func( 1815 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1816 ), 1817 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1818 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1819 exp.Uuid: rename_func("UUID_STRING"), 1820 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1821 exp.Booland: rename_func("BOOLAND"), 1822 exp.Boolor: rename_func("BOOLOR"), 1823 exp.WeekOfYear: rename_func("WEEKISO"), 1824 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1825 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1826 exp.Xor: rename_func("BOOLXOR"), 1827 exp.ByteLength: rename_func("OCTET_LENGTH"), 1828 exp.Flatten: rename_func("ARRAY_FLATTEN"), 1829 exp.ArrayConcatAgg: lambda self, e: self.func( 1830 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1831 ), 1832 exp.SHA2Digest: lambda self, e: self.func( 1833 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1834 ), 1835 } 1836 1837 def nthvalue_sql(self, expression: exp.NthValue) -> str: 1838 result = self.func("NTH_VALUE", expression.this, expression.args.get("offset")) 1839 1840 from_first = expression.args.get("from_first") 1841 1842 if from_first is not None: 1843 if from_first: 1844 result = result + " FROM FIRST" 1845 else: 1846 result = result + " FROM LAST" 1847 1848 return result 1849 1850 SUPPORTED_JSON_PATH_PARTS = { 1851 exp.JSONPathKey, 1852 exp.JSONPathRoot, 1853 exp.JSONPathSubscript, 1854 } 1855 1856 TYPE_MAPPING = { 1857 **generator.Generator.TYPE_MAPPING, 1858 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1859 exp.DataType.Type.NESTED: "OBJECT", 1860 exp.DataType.Type.STRUCT: "OBJECT", 1861 exp.DataType.Type.TEXT: "VARCHAR", 1862 } 1863 1864 TOKEN_MAPPING = { 1865 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1866 } 1867 1868 PROPERTIES_LOCATION = { 1869 **generator.Generator.PROPERTIES_LOCATION, 1870 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1871 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1872 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1873 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1874 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1875 } 1876 1877 UNSUPPORTED_VALUES_EXPRESSIONS = { 1878 exp.Map, 1879 exp.StarMap, 1880 exp.Struct, 1881 exp.VarMap, 1882 } 1883 1884 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1885 1886 def with_properties(self, properties: exp.Properties) -> str: 1887 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1888 1889 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1890 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1891 values_as_table = False 1892 1893 return super().values_sql(expression, values_as_table=values_as_table) 1894 1895 def datatype_sql(self, expression: exp.DataType) -> str: 1896 # Check if this is a FLOAT type nested inside a VECTOR type 1897 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1898 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1899 if expression.is_type(exp.DataType.Type.DOUBLE): 1900 parent = expression.parent 1901 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1902 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1903 return "FLOAT" 1904 1905 expressions = expression.expressions 1906 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1907 for field_type in expressions: 1908 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1909 if isinstance(field_type, exp.DataType): 1910 return "OBJECT" 1911 if ( 1912 isinstance(field_type, exp.ColumnDef) 1913 and field_type.this 1914 and field_type.this.is_string 1915 ): 1916 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1917 # converting 'foo' into an identifier, we also need to quote it because these 1918 # keys are case-sensitive. For example: 1919 # 1920 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1921 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1922 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1923 1924 return super().datatype_sql(expression) 1925 1926 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1927 return self.func( 1928 "TO_NUMBER", 1929 expression.this, 1930 expression.args.get("format"), 1931 expression.args.get("precision"), 1932 expression.args.get("scale"), 1933 ) 1934 1935 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1936 milli = expression.args.get("milli") 1937 if milli is not None: 1938 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1939 expression.set("nano", milli_to_nano) 1940 1941 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1942 1943 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1944 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1945 return self.func("TO_GEOGRAPHY", expression.this) 1946 if expression.is_type(exp.DataType.Type.GEOMETRY): 1947 return self.func("TO_GEOMETRY", expression.this) 1948 1949 return super().cast_sql(expression, safe_prefix=safe_prefix) 1950 1951 def trycast_sql(self, expression: exp.TryCast) -> str: 1952 value = expression.this 1953 1954 if value.type is None: 1955 from sqlglot.optimizer.annotate_types import annotate_types 1956 1957 value = annotate_types(value, dialect=self.dialect) 1958 1959 # Snowflake requires that TRY_CAST's value be a string 1960 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1961 # if we can deduce that the value is a string, then we can generate TRY_CAST 1962 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1963 return super().trycast_sql(expression) 1964 1965 return self.cast_sql(expression) 1966 1967 def log_sql(self, expression: exp.Log) -> str: 1968 if not expression.expression: 1969 return self.func("LN", expression.this) 1970 1971 return super().log_sql(expression) 1972 1973 def greatest_sql(self, expression: exp.Greatest) -> str: 1974 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1975 return self.func(name, expression.this, *expression.expressions) 1976 1977 def least_sql(self, expression: exp.Least) -> str: 1978 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1979 return self.func(name, expression.this, *expression.expressions) 1980 1981 def generator_sql(self, expression: exp.Generator) -> str: 1982 args = [] 1983 rowcount = expression.args.get("rowcount") 1984 time_limit = expression.args.get("time_limit") 1985 1986 if rowcount: 1987 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1988 if time_limit: 1989 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1990 1991 return self.func("GENERATOR", *args) 1992 1993 def unnest_sql(self, expression: exp.Unnest) -> str: 1994 unnest_alias = expression.args.get("alias") 1995 offset = expression.args.get("offset") 1996 1997 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1998 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1999 2000 columns = [ 2001 exp.to_identifier("seq"), 2002 exp.to_identifier("key"), 2003 exp.to_identifier("path"), 2004 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 2005 value, 2006 exp.to_identifier("this"), 2007 ] 2008 2009 if unnest_alias: 2010 unnest_alias.set("columns", columns) 2011 else: 2012 unnest_alias = exp.TableAlias(this="_u", columns=columns) 2013 2014 table_input = self.sql(expression.expressions[0]) 2015 if not table_input.startswith("INPUT =>"): 2016 table_input = f"INPUT => {table_input}" 2017 2018 expression_parent = expression.parent 2019 2020 explode = ( 2021 f"FLATTEN({table_input})" 2022 if isinstance(expression_parent, exp.Lateral) 2023 else f"TABLE(FLATTEN({table_input}))" 2024 ) 2025 alias = self.sql(unnest_alias) 2026 alias = f" AS {alias}" if alias else "" 2027 value = ( 2028 "" 2029 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 2030 else f"{value} FROM " 2031 ) 2032 2033 return f"{value}{explode}{alias}" 2034 2035 def show_sql(self, expression: exp.Show) -> str: 2036 terse = "TERSE " if expression.args.get("terse") else "" 2037 history = " HISTORY" if expression.args.get("history") else "" 2038 like = self.sql(expression, "like") 2039 like = f" LIKE {like}" if like else "" 2040 2041 scope = self.sql(expression, "scope") 2042 scope = f" {scope}" if scope else "" 2043 2044 scope_kind = self.sql(expression, "scope_kind") 2045 if scope_kind: 2046 scope_kind = f" IN {scope_kind}" 2047 2048 starts_with = self.sql(expression, "starts_with") 2049 if starts_with: 2050 starts_with = f" STARTS WITH {starts_with}" 2051 2052 limit = self.sql(expression, "limit") 2053 2054 from_ = self.sql(expression, "from_") 2055 if from_: 2056 from_ = f" FROM {from_}" 2057 2058 privileges = self.expressions(expression, key="privileges", flat=True) 2059 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2060 2061 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 2062 2063 def describe_sql(self, expression: exp.Describe) -> str: 2064 # Default to table if kind is unknown 2065 kind_value = expression.args.get("kind") or "TABLE" 2066 kind = f" {kind_value}" if kind_value else "" 2067 this = f" {self.sql(expression, 'this')}" 2068 expressions = self.expressions(expression, flat=True) 2069 expressions = f" {expressions}" if expressions else "" 2070 return f"DESCRIBE{kind}{this}{expressions}" 2071 2072 def generatedasidentitycolumnconstraint_sql( 2073 self, expression: exp.GeneratedAsIdentityColumnConstraint 2074 ) -> str: 2075 start = expression.args.get("start") 2076 start = f" START {start}" if start else "" 2077 increment = expression.args.get("increment") 2078 increment = f" INCREMENT {increment}" if increment else "" 2079 2080 order = expression.args.get("order") 2081 if order is not None: 2082 order_clause = " ORDER" if order else " NOORDER" 2083 else: 2084 order_clause = "" 2085 2086 return f"AUTOINCREMENT{start}{increment}{order_clause}" 2087 2088 def cluster_sql(self, expression: exp.Cluster) -> str: 2089 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 2090 2091 def struct_sql(self, expression: exp.Struct) -> str: 2092 if len(expression.expressions) == 1: 2093 arg = expression.expressions[0] 2094 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2095 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2096 return f"{{{self.sql(expression.expressions[0])}}}" 2097 2098 keys = [] 2099 values = [] 2100 2101 for i, e in enumerate(expression.expressions): 2102 if isinstance(e, exp.PropertyEQ): 2103 keys.append( 2104 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2105 ) 2106 values.append(e.expression) 2107 else: 2108 keys.append(exp.Literal.string(f"_{i}")) 2109 values.append(e) 2110 2111 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 2112 2113 @unsupported_args("weight", "accuracy") 2114 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 2115 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 2116 2117 def alterset_sql(self, expression: exp.AlterSet) -> str: 2118 exprs = self.expressions(expression, flat=True) 2119 exprs = f" {exprs}" if exprs else "" 2120 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2121 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2122 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2123 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2124 tag = self.expressions(expression, key="tag", flat=True) 2125 tag = f" TAG {tag}" if tag else "" 2126 2127 return f"SET{exprs}{file_format}{copy_options}{tag}" 2128 2129 def strtotime_sql(self, expression: exp.StrToTime): 2130 # target_type is stored as a DataType instance 2131 target_type = expression.args.get("target_type") 2132 2133 # Get the type enum from DataType instance or from type annotation 2134 if isinstance(target_type, exp.DataType): 2135 type_enum = target_type.this 2136 elif expression.type: 2137 type_enum = expression.type.this 2138 else: 2139 type_enum = exp.DataType.Type.TIMESTAMP 2140 2141 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2142 2143 return self.func( 2144 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2145 expression.this, 2146 self.format_time(expression), 2147 ) 2148 2149 def timestampsub_sql(self, expression: exp.TimestampSub): 2150 return self.sql( 2151 exp.TimestampAdd( 2152 this=expression.this, 2153 expression=expression.expression * -1, 2154 unit=expression.unit, 2155 ) 2156 ) 2157 2158 def jsonextract_sql(self, expression: exp.JSONExtract): 2159 this = expression.this 2160 2161 # JSON strings are valid coming from other dialects such as BQ so 2162 # for these cases we PARSE_JSON preemptively 2163 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2164 "requires_json" 2165 ): 2166 this = exp.ParseJSON(this=this) 2167 2168 return self.func( 2169 "GET_PATH", 2170 this, 2171 expression.expression, 2172 ) 2173 2174 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2175 this = expression.this 2176 if this.is_string: 2177 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2178 2179 return self.func("TO_CHAR", this, self.format_time(expression)) 2180 2181 def datesub_sql(self, expression: exp.DateSub) -> str: 2182 value = expression.expression 2183 if value: 2184 value.replace(value * (-1)) 2185 else: 2186 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2187 2188 return date_delta_sql("DATEADD")(self, expression) 2189 2190 def select_sql(self, expression: exp.Select) -> str: 2191 limit = expression.args.get("limit") 2192 offset = expression.args.get("offset") 2193 if offset and not limit: 2194 expression.limit(exp.Null(), copy=False) 2195 return super().select_sql(expression) 2196 2197 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2198 is_materialized = expression.find(exp.MaterializedProperty) 2199 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2200 2201 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2202 # For materialized views, COPY GRANTS is located *before* the columns list 2203 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2204 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2205 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2206 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2207 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2208 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2209 2210 this_name = self.sql(expression.this, "this") 2211 copy_grants = self.sql(copy_grants_property) 2212 this_schema = self.schema_columns_sql(expression.this) 2213 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2214 2215 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2216 2217 return super().createable_sql(expression, locations) 2218 2219 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2220 this = expression.this 2221 2222 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2223 # and add it later as part of the WITHIN GROUP clause 2224 order = this if isinstance(this, exp.Order) else None 2225 if order: 2226 expression.set("this", order.this.pop()) 2227 2228 expr_sql = super().arrayagg_sql(expression) 2229 2230 if order: 2231 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2232 2233 return expr_sql 2234 2235 def array_sql(self, expression: exp.Array) -> str: 2236 expressions = expression.expressions 2237 2238 first_expr = seq_get(expressions, 0) 2239 if isinstance(first_expr, exp.Select): 2240 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2241 if first_expr.text("kind").upper() == "STRUCT": 2242 object_construct_args = [] 2243 for expr in first_expr.expressions: 2244 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2245 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2246 name = expr.this if isinstance(expr, exp.Alias) else expr 2247 2248 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2249 2250 array_agg = exp.ArrayAgg( 2251 this=_build_object_construct(args=object_construct_args) 2252 ) 2253 2254 first_expr.set("kind", None) 2255 first_expr.set("expressions", [array_agg]) 2256 2257 return self.sql(first_expr.subquery()) 2258 2259 return inline_array_sql(self, expression) 2260 2261 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2262 zone = self.sql(expression, "this") 2263 if not zone: 2264 return super().currentdate_sql(expression) 2265 2266 expr = exp.Cast( 2267 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2268 to=exp.DataType(this=exp.DataType.Type.DATE), 2269 ) 2270 return self.sql(expr) 2271 2272 def dot_sql(self, expression: exp.Dot) -> str: 2273 this = expression.this 2274 2275 if not this.type: 2276 from sqlglot.optimizer.annotate_types import annotate_types 2277 2278 this = annotate_types(this, dialect=self.dialect) 2279 2280 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2281 # Generate colon notation for the top level STRUCT 2282 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2283 2284 return super().dot_sql(expression) 2285 2286 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2287 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2288 2289 def format_sql(self, expression: exp.Format) -> str: 2290 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2291 return self.func("TO_CHAR", expression.expressions[0]) 2292 2293 return self.function_fallback_sql(expression) 2294 2295 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2296 # Set part_index to 1 if missing 2297 if not expression.args.get("delimiter"): 2298 expression.set("delimiter", exp.Literal.string(" ")) 2299 2300 if not expression.args.get("part_index"): 2301 expression.set("part_index", exp.Literal.number(1)) 2302 2303 return rename_func("SPLIT_PART")(self, expression) 2304 2305 def uniform_sql(self, expression: exp.Uniform) -> str: 2306 gen = expression.args.get("gen") 2307 seed = expression.args.get("seed") 2308 2309 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2310 if seed: 2311 gen = exp.Rand(this=seed) 2312 2313 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2314 if not gen: 2315 gen = exp.Rand() 2316 2317 return self.func("UNIFORM", expression.this, expression.expression, gen)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Whether Array update functions return NULL when the input array is NULL.
Whether alias references are allowed in JOIN ... ON clauses.
Most dialects do not support this, but Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
For example, in Snowflake: SELECT a.id AS user_id FROM a JOIN b ON user_id = b.id -- VALID
Reference: sqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes">https://docssqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
785 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 786 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 787 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 788 return super().can_quote(identifier, identify) and not ( 789 isinstance(identifier.parent, exp.Table) 790 and not identifier.quoted 791 and identifier.name.lower() == "dual" 792 )
Checks if an identifier can be quoted
Arguments:
- identifier: The identifier to check.
- identify:
True: Always returnsTrueexcept for certain cases."safe": Only returnsTrueif the identifier is case-insensitive."unsafe": Only returnsTrueif the identifier is case-sensitive.
Returns:
Whether the given text can be identified.
Mapping of an escaped sequence (\n) to its unescaped version (
).
794 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 795 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 796 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens
798 class Parser(parser.Parser): 799 IDENTIFY_PIVOT_STRINGS = True 800 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 801 COLON_IS_VARIANT_EXTRACT = True 802 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 803 804 ID_VAR_TOKENS = { 805 *parser.Parser.ID_VAR_TOKENS, 806 TokenType.EXCEPT, 807 TokenType.MATCH_CONDITION, 808 } 809 810 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 811 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 812 813 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 814 815 NO_PAREN_FUNCTIONS = { 816 **parser.Parser.NO_PAREN_FUNCTIONS, 817 TokenType.CURRENT_TIME: exp.Localtime, 818 } 819 820 FUNCTIONS = { 821 **parser.Parser.FUNCTIONS, 822 "ADD_MONTHS": lambda args: exp.AddMonths( 823 this=seq_get(args, 0), 824 expression=seq_get(args, 1), 825 preserve_end_of_month=True, 826 ), 827 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 828 "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), 829 "APPROX_TOP_K": _build_approx_top_k, 830 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 831 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 832 this=seq_get(args, 1), 833 expression=seq_get(args, 0), 834 ensure_variant=False, 835 check_null=True, 836 ), 837 "ARRAY_DISTINCT": lambda args: exp.ArrayDistinct( 838 this=seq_get(args, 0), 839 check_null=True, 840 ), 841 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 842 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 843 start=seq_get(args, 0), 844 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 845 step=seq_get(args, 2), 846 ), 847 "ARRAY_SORT": exp.SortArray.from_arg_list, 848 "ARRAY_FLATTEN": exp.Flatten.from_arg_list, 849 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 850 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 851 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 852 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 853 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 854 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 855 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 856 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 857 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 858 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 859 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 860 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 861 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 862 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 863 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 864 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 865 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 866 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 867 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 868 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 869 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 870 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 871 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 872 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 873 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 874 "BOOLAND": lambda args: exp.Booland( 875 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 876 ), 877 "BOOLOR": lambda args: exp.Boolor( 878 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 879 ), 880 "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), 881 "BOOLXOR": lambda args: exp.Xor( 882 this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True 883 ), 884 "CORR": lambda args: exp.Corr( 885 this=seq_get(args, 0), 886 expression=seq_get(args, 1), 887 null_on_zero_variance=True, 888 ), 889 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 890 "DATEFROMPARTS": _build_date_from_parts, 891 "DATE_FROM_PARTS": _build_date_from_parts, 892 "DATE_TRUNC": _date_trunc_to_time, 893 "DATEADD": _build_date_time_add(exp.DateAdd), 894 "DATEDIFF": _build_datediff, 895 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 896 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 897 "DIV0": _build_if_from_div0, 898 "DIV0NULL": _build_if_from_div0null, 899 "EDITDISTANCE": lambda args: exp.Levenshtein( 900 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 901 ), 902 "FLATTEN": exp.Explode.from_arg_list, 903 "GENERATOR": _build_generator, 904 "GET": exp.GetExtract.from_arg_list, 905 "GETDATE": exp.CurrentTimestamp.from_arg_list, 906 "GET_PATH": lambda args, dialect: exp.JSONExtract( 907 this=seq_get(args, 0), 908 expression=dialect.to_json_path(seq_get(args, 1)), 909 requires_json=True, 910 ), 911 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 912 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 913 ), 914 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 915 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 916 ), 917 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 918 "IFF": exp.If.from_arg_list, 919 "JAROWINKLER_SIMILARITY": lambda args: exp.JarowinklerSimilarity( 920 this=seq_get(args, 0), 921 expression=seq_get(args, 1), 922 case_insensitive=True, 923 ), 924 "MD5_HEX": exp.MD5.from_arg_list, 925 "MD5_BINARY": exp.MD5Digest.from_arg_list, 926 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 927 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 928 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 929 "LAST_DAY": lambda args: exp.LastDay( 930 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 931 ), 932 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 933 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 934 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 935 "NULLIFZERO": _build_if_from_nullifzero, 936 "OBJECT_CONSTRUCT": _build_object_construct, 937 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 938 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 939 "PARSE_URL": lambda args: exp.ParseUrl( 940 this=seq_get(args, 0), permissive=seq_get(args, 1) 941 ), 942 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 943 "REGEXP_LIKE": _build_regexp_like, 944 "REGEXP_REPLACE": _build_regexp_replace, 945 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 946 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 947 "REPLACE": build_replace_with_optional_replacement, 948 "RLIKE": _build_regexp_like, 949 "ROUND": _build_round, 950 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 951 "SHA1_HEX": exp.SHA.from_arg_list, 952 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 953 "SHA2_HEX": exp.SHA2.from_arg_list, 954 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 955 "STDDEV_SAMP": exp.Stddev.from_arg_list, 956 "STRTOK": _build_strtok, 957 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 958 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 959 "TIMEADD": _build_date_time_add(exp.TimeAdd), 960 "TIMEDIFF": _build_datediff, 961 "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( 962 hour=seq_get(args, 0), 963 min=seq_get(args, 1), 964 sec=seq_get(args, 2), 965 nano=seq_get(args, 3), 966 overflow=True, 967 ), 968 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 969 "TIMESTAMPDIFF": _build_datediff, 970 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 971 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 972 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 973 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 974 "TRUNC": lambda args, dialect: build_trunc( 975 args, dialect, date_trunc_requires_part=False 976 ), 977 "TRUNCATE": lambda args, dialect: build_trunc( 978 args, dialect, date_trunc_requires_part=False 979 ), 980 "TRY_DECRYPT": lambda args: exp.Decrypt( 981 this=seq_get(args, 0), 982 passphrase=seq_get(args, 1), 983 aad=seq_get(args, 2), 984 encryption_method=seq_get(args, 3), 985 safe=True, 986 ), 987 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 988 this=seq_get(args, 0), 989 key=seq_get(args, 1), 990 iv=seq_get(args, 2), 991 aad=seq_get(args, 3), 992 encryption_method=seq_get(args, 4), 993 aead=seq_get(args, 5), 994 safe=True, 995 ), 996 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 997 "TRY_TO_BINARY": lambda args: exp.ToBinary( 998 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 999 ), 1000 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 1001 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 1002 **dict.fromkeys( 1003 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 1004 ), 1005 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 1006 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 1007 ), 1008 "TRY_TO_FILE": lambda args: exp.ToFile( 1009 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 1010 ), 1011 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 1012 "TRY_TO_TIMESTAMP": _build_datetime( 1013 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 1014 ), 1015 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 1016 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 1017 ), 1018 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 1019 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 1020 ), 1021 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 1022 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 1023 ), 1024 "TO_CHAR": build_timetostr_or_tochar, 1025 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 1026 **dict.fromkeys( 1027 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 1028 lambda args: exp.ToNumber( 1029 this=seq_get(args, 0), 1030 format=seq_get(args, 1), 1031 precision=seq_get(args, 2), 1032 scale=seq_get(args, 3), 1033 ), 1034 ), 1035 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 1036 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 1037 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 1038 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 1039 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 1040 "TO_VARCHAR": build_timetostr_or_tochar, 1041 "TO_JSON": exp.JSONFormat.from_arg_list, 1042 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 1043 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 1044 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 1045 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 1046 "ZEROIFNULL": _build_if_from_zeroifnull, 1047 "LIKE": build_like(exp.Like), 1048 "ILIKE": build_like(exp.ILike), 1049 "SEARCH": _build_search, 1050 "SKEW": exp.Skewness.from_arg_list, 1051 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 1052 "WEEKISO": exp.WeekOfYear.from_arg_list, 1053 "WEEKOFYEAR": exp.Week.from_arg_list, 1054 } 1055 FUNCTIONS.pop("PREDICT") 1056 1057 FUNCTION_PARSERS = { 1058 **parser.Parser.FUNCTION_PARSERS, 1059 "DATE_PART": lambda self: self._parse_date_part(), 1060 "DIRECTORY": lambda self: self._parse_directory(), 1061 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 1062 "LISTAGG": lambda self: self._parse_string_agg(), 1063 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 1064 } 1065 FUNCTION_PARSERS.pop("TRIM") 1066 1067 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 1068 1069 ALTER_PARSERS = { 1070 **parser.Parser.ALTER_PARSERS, 1071 "MODIFY": lambda self: self._parse_alter_table_alter(), 1072 "SESSION": lambda self: self._parse_alter_session(), 1073 "UNSET": lambda self: self.expression( 1074 exp.Set, 1075 tag=self._match_text_seq("TAG"), 1076 expressions=self._parse_csv(self._parse_id_var), 1077 unset=True, 1078 ), 1079 } 1080 1081 STATEMENT_PARSERS = { 1082 **parser.Parser.STATEMENT_PARSERS, 1083 TokenType.GET: lambda self: self._parse_get(), 1084 TokenType.PUT: lambda self: self._parse_put(), 1085 TokenType.SHOW: lambda self: self._parse_show(), 1086 } 1087 1088 PROPERTY_PARSERS = { 1089 **parser.Parser.PROPERTY_PARSERS, 1090 "CREDENTIALS": lambda self: self._parse_credentials_property(), 1091 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 1092 "LOCATION": lambda self: self._parse_location_property(), 1093 "TAG": lambda self: self._parse_tag(), 1094 "USING": lambda self: self._match_text_seq("TEMPLATE") 1095 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 1096 } 1097 1098 TYPE_CONVERTERS = { 1099 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 1100 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 1101 } 1102 1103 SHOW_PARSERS = { 1104 "DATABASES": _show_parser("DATABASES"), 1105 "TERSE DATABASES": _show_parser("DATABASES"), 1106 "SCHEMAS": _show_parser("SCHEMAS"), 1107 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 1108 "OBJECTS": _show_parser("OBJECTS"), 1109 "TERSE OBJECTS": _show_parser("OBJECTS"), 1110 "TABLES": _show_parser("TABLES"), 1111 "TERSE TABLES": _show_parser("TABLES"), 1112 "VIEWS": _show_parser("VIEWS"), 1113 "TERSE VIEWS": _show_parser("VIEWS"), 1114 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1115 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1116 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1117 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1118 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1119 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1120 "SEQUENCES": _show_parser("SEQUENCES"), 1121 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1122 "STAGES": _show_parser("STAGES"), 1123 "COLUMNS": _show_parser("COLUMNS"), 1124 "USERS": _show_parser("USERS"), 1125 "TERSE USERS": _show_parser("USERS"), 1126 "FILE FORMATS": _show_parser("FILE FORMATS"), 1127 "FUNCTIONS": _show_parser("FUNCTIONS"), 1128 "PROCEDURES": _show_parser("PROCEDURES"), 1129 "WAREHOUSES": _show_parser("WAREHOUSES"), 1130 } 1131 1132 CONSTRAINT_PARSERS = { 1133 **parser.Parser.CONSTRAINT_PARSERS, 1134 "WITH": lambda self: self._parse_with_constraint(), 1135 "MASKING": lambda self: self._parse_with_constraint(), 1136 "PROJECTION": lambda self: self._parse_with_constraint(), 1137 "TAG": lambda self: self._parse_with_constraint(), 1138 } 1139 1140 STAGED_FILE_SINGLE_TOKENS = { 1141 TokenType.DOT, 1142 TokenType.MOD, 1143 TokenType.SLASH, 1144 } 1145 1146 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1147 1148 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1149 1150 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1151 1152 LAMBDAS = { 1153 **parser.Parser.LAMBDAS, 1154 TokenType.ARROW: lambda self, expressions: self.expression( 1155 exp.Lambda, 1156 this=self._replace_lambda( 1157 self._parse_assignment(), 1158 expressions, 1159 ), 1160 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1161 ), 1162 } 1163 1164 COLUMN_OPERATORS = { 1165 **parser.Parser.COLUMN_OPERATORS, 1166 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1167 exp.ModelAttribute, this=this, expression=attr 1168 ), 1169 } 1170 1171 def _parse_directory(self) -> exp.DirectoryStage: 1172 table = self._parse_table_parts() 1173 1174 if isinstance(table, exp.Table): 1175 table = table.this 1176 1177 return self.expression(exp.DirectoryStage, this=table) 1178 1179 def _parse_use(self) -> exp.Use: 1180 if self._match_text_seq("SECONDARY", "ROLES"): 1181 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1182 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1183 return self.expression( 1184 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1185 ) 1186 1187 return super()._parse_use() 1188 1189 def _negate_range( 1190 self, this: t.Optional[exp.Expression] = None 1191 ) -> t.Optional[exp.Expression]: 1192 if not this: 1193 return this 1194 1195 query = this.args.get("query") 1196 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1197 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1198 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1199 # which can produce different results (most likely a SnowFlake bug). 1200 # 1201 # https://docs.snowflake.com/en/sql-reference/functions/in 1202 # Context: https://github.com/tobymao/sqlglot/issues/3890 1203 return self.expression( 1204 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1205 ) 1206 1207 return self.expression(exp.Not, this=this) 1208 1209 def _parse_tag(self) -> exp.Tags: 1210 return self.expression( 1211 exp.Tags, 1212 expressions=self._parse_wrapped_csv(self._parse_property), 1213 ) 1214 1215 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1216 if self._prev.token_type != TokenType.WITH: 1217 self._retreat(self._index - 1) 1218 1219 if self._match_text_seq("MASKING", "POLICY"): 1220 policy = self._parse_column() 1221 return self.expression( 1222 exp.MaskingPolicyColumnConstraint, 1223 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1224 expressions=self._match(TokenType.USING) 1225 and self._parse_wrapped_csv(self._parse_id_var), 1226 ) 1227 if self._match_text_seq("PROJECTION", "POLICY"): 1228 policy = self._parse_column() 1229 return self.expression( 1230 exp.ProjectionPolicyColumnConstraint, 1231 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1232 ) 1233 if self._match(TokenType.TAG): 1234 return self._parse_tag() 1235 1236 return None 1237 1238 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1239 if self._match(TokenType.TAG): 1240 return self._parse_tag() 1241 1242 return super()._parse_with_property() 1243 1244 def _parse_create(self) -> exp.Create | exp.Command: 1245 expression = super()._parse_create() 1246 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1247 # Replace the Table node with the enclosed Identifier 1248 expression.this.replace(expression.this.this) 1249 1250 return expression 1251 1252 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1253 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1254 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1255 this = self._parse_var() or self._parse_type() 1256 1257 if not this: 1258 return None 1259 1260 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1261 expression = ( 1262 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1263 ) 1264 return self.expression( 1265 exp.Extract, this=map_date_part(this, self.dialect), expression=expression 1266 ) 1267 1268 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1269 if is_map: 1270 # Keys are strings in Snowflake's objects, see also: 1271 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1272 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1273 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1274 1275 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1276 1277 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1278 lateral = super()._parse_lateral() 1279 if not lateral: 1280 return lateral 1281 1282 if isinstance(lateral.this, exp.Explode): 1283 table_alias = lateral.args.get("alias") 1284 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1285 if table_alias and not table_alias.args.get("columns"): 1286 table_alias.set("columns", columns) 1287 elif not table_alias: 1288 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1289 1290 return lateral 1291 1292 def _parse_table_parts( 1293 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1294 ) -> exp.Table: 1295 # https://docs.snowflake.com/en/user-guide/querying-stage 1296 if self._match(TokenType.STRING, advance=False): 1297 table = self._parse_string() 1298 elif self._match_text_seq("@", advance=False): 1299 table = self._parse_location_path() 1300 else: 1301 table = None 1302 1303 if table: 1304 file_format = None 1305 pattern = None 1306 1307 wrapped = self._match(TokenType.L_PAREN) 1308 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1309 if self._match_text_seq("FILE_FORMAT", "=>"): 1310 file_format = self._parse_string() or super()._parse_table_parts( 1311 is_db_reference=is_db_reference 1312 ) 1313 elif self._match_text_seq("PATTERN", "=>"): 1314 pattern = self._parse_string() 1315 else: 1316 break 1317 1318 self._match(TokenType.COMMA) 1319 1320 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1321 else: 1322 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1323 1324 return table 1325 1326 def _parse_table( 1327 self, 1328 schema: bool = False, 1329 joins: bool = False, 1330 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1331 parse_bracket: bool = False, 1332 is_db_reference: bool = False, 1333 parse_partition: bool = False, 1334 consume_pipe: bool = False, 1335 ) -> t.Optional[exp.Expression]: 1336 table = super()._parse_table( 1337 schema=schema, 1338 joins=joins, 1339 alias_tokens=alias_tokens, 1340 parse_bracket=parse_bracket, 1341 is_db_reference=is_db_reference, 1342 parse_partition=parse_partition, 1343 ) 1344 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1345 table_from_rows = table.this 1346 for arg in exp.TableFromRows.arg_types: 1347 if arg != "this": 1348 table_from_rows.set(arg, table.args.get(arg)) 1349 1350 table = table_from_rows 1351 1352 return table 1353 1354 def _parse_id_var( 1355 self, 1356 any_token: bool = True, 1357 tokens: t.Optional[t.Collection[TokenType]] = None, 1358 ) -> t.Optional[exp.Expression]: 1359 if self._match_text_seq("IDENTIFIER", "("): 1360 identifier = ( 1361 super()._parse_id_var(any_token=any_token, tokens=tokens) 1362 or self._parse_string() 1363 ) 1364 self._match_r_paren() 1365 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1366 1367 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1368 1369 def _parse_show_snowflake(self, this: str) -> exp.Show: 1370 scope = None 1371 scope_kind = None 1372 1373 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1374 # which is syntactically valid but has no effect on the output 1375 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1376 1377 history = self._match_text_seq("HISTORY") 1378 1379 like = self._parse_string() if self._match(TokenType.LIKE) else None 1380 1381 if self._match(TokenType.IN): 1382 if self._match_text_seq("ACCOUNT"): 1383 scope_kind = "ACCOUNT" 1384 elif self._match_text_seq("CLASS"): 1385 scope_kind = "CLASS" 1386 scope = self._parse_table_parts() 1387 elif self._match_text_seq("APPLICATION"): 1388 scope_kind = "APPLICATION" 1389 if self._match_text_seq("PACKAGE"): 1390 scope_kind += " PACKAGE" 1391 scope = self._parse_table_parts() 1392 elif self._match_set(self.DB_CREATABLES): 1393 scope_kind = self._prev.text.upper() 1394 if self._curr: 1395 scope = self._parse_table_parts() 1396 elif self._curr: 1397 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1398 scope = self._parse_table_parts() 1399 1400 return self.expression( 1401 exp.Show, 1402 terse=terse, 1403 this=this, 1404 history=history, 1405 like=like, 1406 scope=scope, 1407 scope_kind=scope_kind, 1408 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1409 limit=self._parse_limit(), 1410 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1411 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1412 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1413 ) 1414 1415 def _parse_put(self) -> exp.Put | exp.Command: 1416 if self._curr.token_type != TokenType.STRING: 1417 return self._parse_as_command(self._prev) 1418 1419 return self.expression( 1420 exp.Put, 1421 this=self._parse_string(), 1422 target=self._parse_location_path(), 1423 properties=self._parse_properties(), 1424 ) 1425 1426 def _parse_get(self) -> t.Optional[exp.Expression]: 1427 start = self._prev 1428 1429 # If we detect GET( then we need to parse a function, not a statement 1430 if self._match(TokenType.L_PAREN): 1431 self._retreat(self._index - 2) 1432 return self._parse_expression() 1433 1434 target = self._parse_location_path() 1435 1436 # Parse as command if unquoted file path 1437 if self._curr.token_type == TokenType.URI_START: 1438 return self._parse_as_command(start) 1439 1440 return self.expression( 1441 exp.Get, 1442 this=self._parse_string(), 1443 target=target, 1444 properties=self._parse_properties(), 1445 ) 1446 1447 def _parse_location_property(self) -> exp.LocationProperty: 1448 self._match(TokenType.EQ) 1449 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1450 1451 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1452 # Parse either a subquery or a staged file 1453 return ( 1454 self._parse_select(table=True, parse_subquery_alias=False) 1455 if self._match(TokenType.L_PAREN, advance=False) 1456 else self._parse_table_parts() 1457 ) 1458 1459 def _parse_location_path(self) -> exp.Var: 1460 start = self._curr 1461 self._advance_any(ignore_reserved=True) 1462 1463 # We avoid consuming a comma token because external tables like @foo and @bar 1464 # can be joined in a query with a comma separator, as well as closing paren 1465 # in case of subqueries 1466 while self._is_connected() and not self._match_set( 1467 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1468 ): 1469 self._advance_any(ignore_reserved=True) 1470 1471 return exp.var(self._find_sql(start, self._prev)) 1472 1473 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1474 this = super()._parse_lambda_arg() 1475 1476 if not this: 1477 return this 1478 1479 typ = self._parse_types() 1480 1481 if typ: 1482 return self.expression(exp.Cast, this=this, to=typ) 1483 1484 return this 1485 1486 def _parse_foreign_key(self) -> exp.ForeignKey: 1487 # inlineFK, the REFERENCES columns are implied 1488 if self._match(TokenType.REFERENCES, advance=False): 1489 return self.expression(exp.ForeignKey) 1490 1491 # outoflineFK, explicitly names the columns 1492 return super()._parse_foreign_key() 1493 1494 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1495 self._match(TokenType.EQ) 1496 if self._match(TokenType.L_PAREN, advance=False): 1497 expressions = self._parse_wrapped_options() 1498 else: 1499 expressions = [self._parse_format_name()] 1500 1501 return self.expression( 1502 exp.FileFormatProperty, 1503 expressions=expressions, 1504 ) 1505 1506 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1507 return self.expression( 1508 exp.CredentialsProperty, 1509 expressions=self._parse_wrapped_options(), 1510 ) 1511 1512 def _parse_semantic_view(self) -> exp.SemanticView: 1513 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1514 1515 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1516 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1517 keyword = self._prev.text.lower() 1518 kwargs[keyword] = self._parse_csv( 1519 lambda: self._parse_alias(self._parse_disjunction(), explicit=True) 1520 ) 1521 elif self._match_text_seq("WHERE"): 1522 kwargs["where"] = self._parse_expression() 1523 else: 1524 self.raise_error("Expecting ) or encountered unexpected keyword") 1525 break 1526 1527 return self.expression(exp.SemanticView, **kwargs) 1528 1529 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1530 set = super()._parse_set(unset=unset, tag=tag) 1531 1532 if isinstance(set, exp.Set): 1533 for expr in set.expressions: 1534 if isinstance(expr, exp.SetItem): 1535 expr.set("kind", "VARIABLE") 1536 return set 1537 1538 def _parse_window( 1539 self, this: t.Optional[exp.Expression], alias: bool = False 1540 ) -> t.Optional[exp.Expression]: 1541 if isinstance(this, exp.NthValue): 1542 if self._match_text_seq("FROM"): 1543 if self._match_texts(("FIRST", "LAST")): 1544 from_first = self._prev.text.upper() == "FIRST" 1545 this.set("from_first", from_first) 1546 1547 result = super()._parse_window(this, alias) 1548 1549 return result
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- raise_error
- validate_expression
- reset
- errors
- error_level
- error_message_context
- max_errors
- dialect
- sql
- parse
- parse_into
- check_errors
- expression
- parse_set_operation
- build_cast
1551 class Tokenizer(tokens.Tokenizer): 1552 STRING_ESCAPES = ["\\", "'"] 1553 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1554 RAW_STRINGS = ["$$"] 1555 COMMENTS = ["--", "//", ("/*", "*/")] 1556 NESTED_COMMENTS = False 1557 1558 KEYWORDS = { 1559 **tokens.Tokenizer.KEYWORDS, 1560 "BYTEINT": TokenType.INT, 1561 "FILE://": TokenType.URI_START, 1562 "FILE FORMAT": TokenType.FILE_FORMAT, 1563 "GET": TokenType.GET, 1564 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1565 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1566 "MINUS": TokenType.EXCEPT, 1567 "NCHAR VARYING": TokenType.VARCHAR, 1568 "PUT": TokenType.PUT, 1569 "REMOVE": TokenType.COMMAND, 1570 "RM": TokenType.COMMAND, 1571 "SAMPLE": TokenType.TABLE_SAMPLE, 1572 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1573 "SQL_DOUBLE": TokenType.DOUBLE, 1574 "SQL_VARCHAR": TokenType.VARCHAR, 1575 "STAGE": TokenType.STAGE, 1576 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1577 "STREAMLIT": TokenType.STREAMLIT, 1578 "TAG": TokenType.TAG, 1579 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1580 "TOP": TokenType.TOP, 1581 "WAREHOUSE": TokenType.WAREHOUSE, 1582 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1583 # FLOAT is a synonym for DOUBLE in Snowflake 1584 "FLOAT": TokenType.DOUBLE, 1585 } 1586 KEYWORDS.pop("/*+") 1587 1588 SINGLE_TOKENS = { 1589 **tokens.Tokenizer.SINGLE_TOKENS, 1590 "$": TokenType.PARAMETER, 1591 "!": TokenType.EXCLAMATION, 1592 } 1593 1594 VAR_SINGLE_TOKENS = {"$"} 1595 1596 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- tokenize
- sql
- size
- tokens
1598 class Generator(generator.Generator): 1599 PARAMETER_TOKEN = "$" 1600 MATCHED_BY_SOURCE = False 1601 SINGLE_STRING_INTERVAL = True 1602 JOIN_HINTS = False 1603 TABLE_HINTS = False 1604 QUERY_HINTS = False 1605 AGGREGATE_FILTER_SUPPORTED = False 1606 SUPPORTS_TABLE_COPY = False 1607 COLLATE_IS_FUNC = True 1608 LIMIT_ONLY_LITERALS = True 1609 JSON_KEY_VALUE_PAIR_SEP = "," 1610 INSERT_OVERWRITE = " OVERWRITE INTO" 1611 STRUCT_DELIMITER = ("(", ")") 1612 COPY_PARAMS_ARE_WRAPPED = False 1613 COPY_PARAMS_EQ_REQUIRED = True 1614 STAR_EXCEPT = "EXCLUDE" 1615 SUPPORTS_EXPLODING_PROJECTIONS = False 1616 ARRAY_CONCAT_IS_VAR_LEN = False 1617 SUPPORTS_CONVERT_TIMEZONE = True 1618 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1619 SUPPORTS_MEDIAN = True 1620 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1621 SUPPORTS_DECODE_CASE = True 1622 IS_BOOL_ALLOWED = False 1623 DIRECTED_JOINS = True 1624 1625 TRANSFORMS = { 1626 **generator.Generator.TRANSFORMS, 1627 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1628 exp.ArgMax: rename_func("MAX_BY"), 1629 exp.ArgMin: rename_func("MIN_BY"), 1630 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1631 exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), 1632 exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), 1633 exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), 1634 exp.ArrayContains: lambda self, e: self.func( 1635 "ARRAY_CONTAINS", 1636 e.expression 1637 if e.args.get("ensure_variant") is False 1638 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1639 e.this, 1640 ), 1641 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1642 exp.AtTimeZone: lambda self, e: self.func( 1643 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1644 ), 1645 exp.BitwiseOr: rename_func("BITOR"), 1646 exp.BitwiseXor: rename_func("BITXOR"), 1647 exp.BitwiseAnd: rename_func("BITAND"), 1648 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1649 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1650 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1651 exp.BitwiseNot: rename_func("BITNOT"), 1652 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1653 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1654 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1655 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1656 if e.args.get("sysdate") 1657 else self.function_fallback_sql(e), 1658 exp.CurrentSchemas: lambda self, e: self.func("CURRENT_SCHEMAS"), 1659 exp.Localtime: lambda self, e: self.func("CURRENT_TIME", e.this) 1660 if e.this 1661 else "CURRENT_TIME", 1662 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1663 if e.this 1664 else "CURRENT_TIMESTAMP", 1665 exp.DateAdd: date_delta_sql("DATEADD"), 1666 exp.DateDiff: date_delta_sql("DATEDIFF"), 1667 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1668 exp.DatetimeDiff: timestampdiff_sql, 1669 exp.DateStrToDate: datestrtodate_sql, 1670 exp.Decrypt: lambda self, e: self.func( 1671 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1672 e.this, 1673 e.args.get("passphrase"), 1674 e.args.get("aad"), 1675 e.args.get("encryption_method"), 1676 ), 1677 exp.DecryptRaw: lambda self, e: self.func( 1678 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1679 e.this, 1680 e.args.get("key"), 1681 e.args.get("iv"), 1682 e.args.get("aad"), 1683 e.args.get("encryption_method"), 1684 e.args.get("aead"), 1685 ), 1686 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1687 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1688 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1689 exp.DayOfYear: rename_func("DAYOFYEAR"), 1690 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1691 exp.Explode: rename_func("FLATTEN"), 1692 exp.Extract: lambda self, e: self.func( 1693 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1694 ), 1695 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1696 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1697 exp.FileFormatProperty: lambda self, 1698 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1699 exp.FromTimeZone: lambda self, e: self.func( 1700 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1701 ), 1702 exp.GenerateSeries: lambda self, e: self.func( 1703 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1704 ), 1705 exp.GetExtract: rename_func("GET"), 1706 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1707 exp.If: if_sql(name="IFF", false_value="NULL"), 1708 exp.JSONExtractArray: _json_extract_value_array_sql, 1709 exp.JSONExtractScalar: lambda self, e: self.func( 1710 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1711 ), 1712 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1713 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1714 exp.JSONPathRoot: lambda *_: "", 1715 exp.JSONValueArray: _json_extract_value_array_sql, 1716 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1717 rename_func("EDITDISTANCE") 1718 ), 1719 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1720 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1721 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1722 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1723 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1724 exp.MakeInterval: no_make_interval_sql, 1725 exp.Max: max_or_greatest, 1726 exp.Min: min_or_least, 1727 exp.ParseJSON: lambda self, e: self.func( 1728 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1729 ), 1730 exp.ToBinary: lambda self, e: self.func( 1731 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1732 ), 1733 exp.ToBoolean: lambda self, e: self.func( 1734 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1735 ), 1736 exp.ToDouble: lambda self, e: self.func( 1737 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1738 ), 1739 exp.ToFile: lambda self, e: self.func( 1740 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1741 ), 1742 exp.ToNumber: lambda self, e: self.func( 1743 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1744 e.this, 1745 e.args.get("format"), 1746 e.args.get("precision"), 1747 e.args.get("scale"), 1748 ), 1749 exp.JSONFormat: rename_func("TO_JSON"), 1750 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1751 exp.PercentileCont: transforms.preprocess( 1752 [transforms.add_within_group_for_percentiles] 1753 ), 1754 exp.PercentileDisc: transforms.preprocess( 1755 [transforms.add_within_group_for_percentiles] 1756 ), 1757 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1758 exp.RegexpExtract: _regexpextract_sql, 1759 exp.RegexpExtractAll: _regexpextract_sql, 1760 exp.RegexpILike: _regexpilike_sql, 1761 exp.Rand: rename_func("RANDOM"), 1762 exp.Select: transforms.preprocess( 1763 [ 1764 transforms.eliminate_window_clause, 1765 transforms.eliminate_distinct_on, 1766 transforms.explode_projection_to_unnest(), 1767 transforms.eliminate_semi_and_anti_joins, 1768 _transform_generate_date_array, 1769 _qualify_unnested_columns, 1770 _eliminate_dot_variant_lookup, 1771 ] 1772 ), 1773 exp.SHA: rename_func("SHA1"), 1774 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1775 exp.MD5Digest: rename_func("MD5_BINARY"), 1776 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1777 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1778 exp.LowerHex: rename_func("TO_CHAR"), 1779 exp.SortArray: rename_func("ARRAY_SORT"), 1780 exp.Skewness: rename_func("SKEW"), 1781 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1782 exp.StartsWith: rename_func("STARTSWITH"), 1783 exp.EndsWith: rename_func("ENDSWITH"), 1784 exp.StrPosition: lambda self, e: strposition_sql( 1785 self, e, func_name="CHARINDEX", supports_position=True 1786 ), 1787 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1788 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1789 exp.Stuff: rename_func("INSERT"), 1790 exp.StPoint: rename_func("ST_MAKEPOINT"), 1791 exp.TimeAdd: date_delta_sql("TIMEADD"), 1792 exp.TimeSlice: lambda self, e: self.func( 1793 "TIME_SLICE", 1794 e.this, 1795 e.expression, 1796 unit_to_str(e), 1797 e.args.get("kind"), 1798 ), 1799 exp.Timestamp: no_timestamp_sql, 1800 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1801 exp.TimestampDiff: lambda self, e: self.func( 1802 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1803 ), 1804 exp.TimestampTrunc: timestamptrunc_sql(), 1805 exp.TimeStrToTime: timestrtotime_sql, 1806 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1807 exp.ToArray: rename_func("TO_ARRAY"), 1808 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1809 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1810 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1811 exp.TsOrDsToDate: lambda self, e: self.func( 1812 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1813 ), 1814 exp.TsOrDsToTime: lambda self, e: self.func( 1815 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1816 ), 1817 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1818 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1819 exp.Uuid: rename_func("UUID_STRING"), 1820 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1821 exp.Booland: rename_func("BOOLAND"), 1822 exp.Boolor: rename_func("BOOLOR"), 1823 exp.WeekOfYear: rename_func("WEEKISO"), 1824 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1825 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1826 exp.Xor: rename_func("BOOLXOR"), 1827 exp.ByteLength: rename_func("OCTET_LENGTH"), 1828 exp.Flatten: rename_func("ARRAY_FLATTEN"), 1829 exp.ArrayConcatAgg: lambda self, e: self.func( 1830 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1831 ), 1832 exp.SHA2Digest: lambda self, e: self.func( 1833 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1834 ), 1835 } 1836 1837 def nthvalue_sql(self, expression: exp.NthValue) -> str: 1838 result = self.func("NTH_VALUE", expression.this, expression.args.get("offset")) 1839 1840 from_first = expression.args.get("from_first") 1841 1842 if from_first is not None: 1843 if from_first: 1844 result = result + " FROM FIRST" 1845 else: 1846 result = result + " FROM LAST" 1847 1848 return result 1849 1850 SUPPORTED_JSON_PATH_PARTS = { 1851 exp.JSONPathKey, 1852 exp.JSONPathRoot, 1853 exp.JSONPathSubscript, 1854 } 1855 1856 TYPE_MAPPING = { 1857 **generator.Generator.TYPE_MAPPING, 1858 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1859 exp.DataType.Type.NESTED: "OBJECT", 1860 exp.DataType.Type.STRUCT: "OBJECT", 1861 exp.DataType.Type.TEXT: "VARCHAR", 1862 } 1863 1864 TOKEN_MAPPING = { 1865 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1866 } 1867 1868 PROPERTIES_LOCATION = { 1869 **generator.Generator.PROPERTIES_LOCATION, 1870 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1871 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1872 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1873 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1874 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1875 } 1876 1877 UNSUPPORTED_VALUES_EXPRESSIONS = { 1878 exp.Map, 1879 exp.StarMap, 1880 exp.Struct, 1881 exp.VarMap, 1882 } 1883 1884 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1885 1886 def with_properties(self, properties: exp.Properties) -> str: 1887 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1888 1889 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1890 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1891 values_as_table = False 1892 1893 return super().values_sql(expression, values_as_table=values_as_table) 1894 1895 def datatype_sql(self, expression: exp.DataType) -> str: 1896 # Check if this is a FLOAT type nested inside a VECTOR type 1897 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1898 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1899 if expression.is_type(exp.DataType.Type.DOUBLE): 1900 parent = expression.parent 1901 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1902 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1903 return "FLOAT" 1904 1905 expressions = expression.expressions 1906 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1907 for field_type in expressions: 1908 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1909 if isinstance(field_type, exp.DataType): 1910 return "OBJECT" 1911 if ( 1912 isinstance(field_type, exp.ColumnDef) 1913 and field_type.this 1914 and field_type.this.is_string 1915 ): 1916 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1917 # converting 'foo' into an identifier, we also need to quote it because these 1918 # keys are case-sensitive. For example: 1919 # 1920 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1921 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1922 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1923 1924 return super().datatype_sql(expression) 1925 1926 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1927 return self.func( 1928 "TO_NUMBER", 1929 expression.this, 1930 expression.args.get("format"), 1931 expression.args.get("precision"), 1932 expression.args.get("scale"), 1933 ) 1934 1935 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1936 milli = expression.args.get("milli") 1937 if milli is not None: 1938 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1939 expression.set("nano", milli_to_nano) 1940 1941 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1942 1943 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1944 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1945 return self.func("TO_GEOGRAPHY", expression.this) 1946 if expression.is_type(exp.DataType.Type.GEOMETRY): 1947 return self.func("TO_GEOMETRY", expression.this) 1948 1949 return super().cast_sql(expression, safe_prefix=safe_prefix) 1950 1951 def trycast_sql(self, expression: exp.TryCast) -> str: 1952 value = expression.this 1953 1954 if value.type is None: 1955 from sqlglot.optimizer.annotate_types import annotate_types 1956 1957 value = annotate_types(value, dialect=self.dialect) 1958 1959 # Snowflake requires that TRY_CAST's value be a string 1960 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1961 # if we can deduce that the value is a string, then we can generate TRY_CAST 1962 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1963 return super().trycast_sql(expression) 1964 1965 return self.cast_sql(expression) 1966 1967 def log_sql(self, expression: exp.Log) -> str: 1968 if not expression.expression: 1969 return self.func("LN", expression.this) 1970 1971 return super().log_sql(expression) 1972 1973 def greatest_sql(self, expression: exp.Greatest) -> str: 1974 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1975 return self.func(name, expression.this, *expression.expressions) 1976 1977 def least_sql(self, expression: exp.Least) -> str: 1978 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1979 return self.func(name, expression.this, *expression.expressions) 1980 1981 def generator_sql(self, expression: exp.Generator) -> str: 1982 args = [] 1983 rowcount = expression.args.get("rowcount") 1984 time_limit = expression.args.get("time_limit") 1985 1986 if rowcount: 1987 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1988 if time_limit: 1989 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1990 1991 return self.func("GENERATOR", *args) 1992 1993 def unnest_sql(self, expression: exp.Unnest) -> str: 1994 unnest_alias = expression.args.get("alias") 1995 offset = expression.args.get("offset") 1996 1997 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1998 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1999 2000 columns = [ 2001 exp.to_identifier("seq"), 2002 exp.to_identifier("key"), 2003 exp.to_identifier("path"), 2004 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 2005 value, 2006 exp.to_identifier("this"), 2007 ] 2008 2009 if unnest_alias: 2010 unnest_alias.set("columns", columns) 2011 else: 2012 unnest_alias = exp.TableAlias(this="_u", columns=columns) 2013 2014 table_input = self.sql(expression.expressions[0]) 2015 if not table_input.startswith("INPUT =>"): 2016 table_input = f"INPUT => {table_input}" 2017 2018 expression_parent = expression.parent 2019 2020 explode = ( 2021 f"FLATTEN({table_input})" 2022 if isinstance(expression_parent, exp.Lateral) 2023 else f"TABLE(FLATTEN({table_input}))" 2024 ) 2025 alias = self.sql(unnest_alias) 2026 alias = f" AS {alias}" if alias else "" 2027 value = ( 2028 "" 2029 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 2030 else f"{value} FROM " 2031 ) 2032 2033 return f"{value}{explode}{alias}" 2034 2035 def show_sql(self, expression: exp.Show) -> str: 2036 terse = "TERSE " if expression.args.get("terse") else "" 2037 history = " HISTORY" if expression.args.get("history") else "" 2038 like = self.sql(expression, "like") 2039 like = f" LIKE {like}" if like else "" 2040 2041 scope = self.sql(expression, "scope") 2042 scope = f" {scope}" if scope else "" 2043 2044 scope_kind = self.sql(expression, "scope_kind") 2045 if scope_kind: 2046 scope_kind = f" IN {scope_kind}" 2047 2048 starts_with = self.sql(expression, "starts_with") 2049 if starts_with: 2050 starts_with = f" STARTS WITH {starts_with}" 2051 2052 limit = self.sql(expression, "limit") 2053 2054 from_ = self.sql(expression, "from_") 2055 if from_: 2056 from_ = f" FROM {from_}" 2057 2058 privileges = self.expressions(expression, key="privileges", flat=True) 2059 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2060 2061 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 2062 2063 def describe_sql(self, expression: exp.Describe) -> str: 2064 # Default to table if kind is unknown 2065 kind_value = expression.args.get("kind") or "TABLE" 2066 kind = f" {kind_value}" if kind_value else "" 2067 this = f" {self.sql(expression, 'this')}" 2068 expressions = self.expressions(expression, flat=True) 2069 expressions = f" {expressions}" if expressions else "" 2070 return f"DESCRIBE{kind}{this}{expressions}" 2071 2072 def generatedasidentitycolumnconstraint_sql( 2073 self, expression: exp.GeneratedAsIdentityColumnConstraint 2074 ) -> str: 2075 start = expression.args.get("start") 2076 start = f" START {start}" if start else "" 2077 increment = expression.args.get("increment") 2078 increment = f" INCREMENT {increment}" if increment else "" 2079 2080 order = expression.args.get("order") 2081 if order is not None: 2082 order_clause = " ORDER" if order else " NOORDER" 2083 else: 2084 order_clause = "" 2085 2086 return f"AUTOINCREMENT{start}{increment}{order_clause}" 2087 2088 def cluster_sql(self, expression: exp.Cluster) -> str: 2089 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 2090 2091 def struct_sql(self, expression: exp.Struct) -> str: 2092 if len(expression.expressions) == 1: 2093 arg = expression.expressions[0] 2094 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2095 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2096 return f"{{{self.sql(expression.expressions[0])}}}" 2097 2098 keys = [] 2099 values = [] 2100 2101 for i, e in enumerate(expression.expressions): 2102 if isinstance(e, exp.PropertyEQ): 2103 keys.append( 2104 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2105 ) 2106 values.append(e.expression) 2107 else: 2108 keys.append(exp.Literal.string(f"_{i}")) 2109 values.append(e) 2110 2111 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 2112 2113 @unsupported_args("weight", "accuracy") 2114 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 2115 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 2116 2117 def alterset_sql(self, expression: exp.AlterSet) -> str: 2118 exprs = self.expressions(expression, flat=True) 2119 exprs = f" {exprs}" if exprs else "" 2120 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2121 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2122 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2123 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2124 tag = self.expressions(expression, key="tag", flat=True) 2125 tag = f" TAG {tag}" if tag else "" 2126 2127 return f"SET{exprs}{file_format}{copy_options}{tag}" 2128 2129 def strtotime_sql(self, expression: exp.StrToTime): 2130 # target_type is stored as a DataType instance 2131 target_type = expression.args.get("target_type") 2132 2133 # Get the type enum from DataType instance or from type annotation 2134 if isinstance(target_type, exp.DataType): 2135 type_enum = target_type.this 2136 elif expression.type: 2137 type_enum = expression.type.this 2138 else: 2139 type_enum = exp.DataType.Type.TIMESTAMP 2140 2141 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2142 2143 return self.func( 2144 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2145 expression.this, 2146 self.format_time(expression), 2147 ) 2148 2149 def timestampsub_sql(self, expression: exp.TimestampSub): 2150 return self.sql( 2151 exp.TimestampAdd( 2152 this=expression.this, 2153 expression=expression.expression * -1, 2154 unit=expression.unit, 2155 ) 2156 ) 2157 2158 def jsonextract_sql(self, expression: exp.JSONExtract): 2159 this = expression.this 2160 2161 # JSON strings are valid coming from other dialects such as BQ so 2162 # for these cases we PARSE_JSON preemptively 2163 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2164 "requires_json" 2165 ): 2166 this = exp.ParseJSON(this=this) 2167 2168 return self.func( 2169 "GET_PATH", 2170 this, 2171 expression.expression, 2172 ) 2173 2174 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2175 this = expression.this 2176 if this.is_string: 2177 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2178 2179 return self.func("TO_CHAR", this, self.format_time(expression)) 2180 2181 def datesub_sql(self, expression: exp.DateSub) -> str: 2182 value = expression.expression 2183 if value: 2184 value.replace(value * (-1)) 2185 else: 2186 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2187 2188 return date_delta_sql("DATEADD")(self, expression) 2189 2190 def select_sql(self, expression: exp.Select) -> str: 2191 limit = expression.args.get("limit") 2192 offset = expression.args.get("offset") 2193 if offset and not limit: 2194 expression.limit(exp.Null(), copy=False) 2195 return super().select_sql(expression) 2196 2197 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2198 is_materialized = expression.find(exp.MaterializedProperty) 2199 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2200 2201 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2202 # For materialized views, COPY GRANTS is located *before* the columns list 2203 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2204 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2205 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2206 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2207 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2208 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2209 2210 this_name = self.sql(expression.this, "this") 2211 copy_grants = self.sql(copy_grants_property) 2212 this_schema = self.schema_columns_sql(expression.this) 2213 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2214 2215 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2216 2217 return super().createable_sql(expression, locations) 2218 2219 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2220 this = expression.this 2221 2222 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2223 # and add it later as part of the WITHIN GROUP clause 2224 order = this if isinstance(this, exp.Order) else None 2225 if order: 2226 expression.set("this", order.this.pop()) 2227 2228 expr_sql = super().arrayagg_sql(expression) 2229 2230 if order: 2231 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2232 2233 return expr_sql 2234 2235 def array_sql(self, expression: exp.Array) -> str: 2236 expressions = expression.expressions 2237 2238 first_expr = seq_get(expressions, 0) 2239 if isinstance(first_expr, exp.Select): 2240 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2241 if first_expr.text("kind").upper() == "STRUCT": 2242 object_construct_args = [] 2243 for expr in first_expr.expressions: 2244 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2245 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2246 name = expr.this if isinstance(expr, exp.Alias) else expr 2247 2248 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2249 2250 array_agg = exp.ArrayAgg( 2251 this=_build_object_construct(args=object_construct_args) 2252 ) 2253 2254 first_expr.set("kind", None) 2255 first_expr.set("expressions", [array_agg]) 2256 2257 return self.sql(first_expr.subquery()) 2258 2259 return inline_array_sql(self, expression) 2260 2261 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2262 zone = self.sql(expression, "this") 2263 if not zone: 2264 return super().currentdate_sql(expression) 2265 2266 expr = exp.Cast( 2267 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2268 to=exp.DataType(this=exp.DataType.Type.DATE), 2269 ) 2270 return self.sql(expr) 2271 2272 def dot_sql(self, expression: exp.Dot) -> str: 2273 this = expression.this 2274 2275 if not this.type: 2276 from sqlglot.optimizer.annotate_types import annotate_types 2277 2278 this = annotate_types(this, dialect=self.dialect) 2279 2280 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2281 # Generate colon notation for the top level STRUCT 2282 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2283 2284 return super().dot_sql(expression) 2285 2286 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2287 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2288 2289 def format_sql(self, expression: exp.Format) -> str: 2290 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2291 return self.func("TO_CHAR", expression.expressions[0]) 2292 2293 return self.function_fallback_sql(expression) 2294 2295 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2296 # Set part_index to 1 if missing 2297 if not expression.args.get("delimiter"): 2298 expression.set("delimiter", exp.Literal.string(" ")) 2299 2300 if not expression.args.get("part_index"): 2301 expression.set("part_index", exp.Literal.number(1)) 2302 2303 return rename_func("SPLIT_PART")(self, expression) 2304 2305 def uniform_sql(self, expression: exp.Uniform) -> str: 2306 gen = expression.args.get("gen") 2307 seed = expression.args.get("seed") 2308 2309 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2310 if seed: 2311 gen = exp.Rand(this=seed) 2312 2313 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2314 if not gen: 2315 gen = exp.Rand() 2316 2317 return self.func("UNIFORM", expression.this, expression.expression, gen)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1837 def nthvalue_sql(self, expression: exp.NthValue) -> str: 1838 result = self.func("NTH_VALUE", expression.this, expression.args.get("offset")) 1839 1840 from_first = expression.args.get("from_first") 1841 1842 if from_first is not None: 1843 if from_first: 1844 result = result + " FROM FIRST" 1845 else: 1846 result = result + " FROM LAST" 1847 1848 return result
1895 def datatype_sql(self, expression: exp.DataType) -> str: 1896 # Check if this is a FLOAT type nested inside a VECTOR type 1897 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1898 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1899 if expression.is_type(exp.DataType.Type.DOUBLE): 1900 parent = expression.parent 1901 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1902 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1903 return "FLOAT" 1904 1905 expressions = expression.expressions 1906 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1907 for field_type in expressions: 1908 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1909 if isinstance(field_type, exp.DataType): 1910 return "OBJECT" 1911 if ( 1912 isinstance(field_type, exp.ColumnDef) 1913 and field_type.this 1914 and field_type.this.is_string 1915 ): 1916 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1917 # converting 'foo' into an identifier, we also need to quote it because these 1918 # keys are case-sensitive. For example: 1919 # 1920 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1921 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1922 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1923 1924 return super().datatype_sql(expression)
1935 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1936 milli = expression.args.get("milli") 1937 if milli is not None: 1938 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1939 expression.set("nano", milli_to_nano) 1940 1941 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1943 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1944 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1945 return self.func("TO_GEOGRAPHY", expression.this) 1946 if expression.is_type(exp.DataType.Type.GEOMETRY): 1947 return self.func("TO_GEOMETRY", expression.this) 1948 1949 return super().cast_sql(expression, safe_prefix=safe_prefix)
1951 def trycast_sql(self, expression: exp.TryCast) -> str: 1952 value = expression.this 1953 1954 if value.type is None: 1955 from sqlglot.optimizer.annotate_types import annotate_types 1956 1957 value = annotate_types(value, dialect=self.dialect) 1958 1959 # Snowflake requires that TRY_CAST's value be a string 1960 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1961 # if we can deduce that the value is a string, then we can generate TRY_CAST 1962 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1963 return super().trycast_sql(expression) 1964 1965 return self.cast_sql(expression)
1981 def generator_sql(self, expression: exp.Generator) -> str: 1982 args = [] 1983 rowcount = expression.args.get("rowcount") 1984 time_limit = expression.args.get("time_limit") 1985 1986 if rowcount: 1987 args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) 1988 if time_limit: 1989 args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=time_limit)) 1990 1991 return self.func("GENERATOR", *args)
1993 def unnest_sql(self, expression: exp.Unnest) -> str: 1994 unnest_alias = expression.args.get("alias") 1995 offset = expression.args.get("offset") 1996 1997 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1998 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1999 2000 columns = [ 2001 exp.to_identifier("seq"), 2002 exp.to_identifier("key"), 2003 exp.to_identifier("path"), 2004 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 2005 value, 2006 exp.to_identifier("this"), 2007 ] 2008 2009 if unnest_alias: 2010 unnest_alias.set("columns", columns) 2011 else: 2012 unnest_alias = exp.TableAlias(this="_u", columns=columns) 2013 2014 table_input = self.sql(expression.expressions[0]) 2015 if not table_input.startswith("INPUT =>"): 2016 table_input = f"INPUT => {table_input}" 2017 2018 expression_parent = expression.parent 2019 2020 explode = ( 2021 f"FLATTEN({table_input})" 2022 if isinstance(expression_parent, exp.Lateral) 2023 else f"TABLE(FLATTEN({table_input}))" 2024 ) 2025 alias = self.sql(unnest_alias) 2026 alias = f" AS {alias}" if alias else "" 2027 value = ( 2028 "" 2029 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 2030 else f"{value} FROM " 2031 ) 2032 2033 return f"{value}{explode}{alias}"
2035 def show_sql(self, expression: exp.Show) -> str: 2036 terse = "TERSE " if expression.args.get("terse") else "" 2037 history = " HISTORY" if expression.args.get("history") else "" 2038 like = self.sql(expression, "like") 2039 like = f" LIKE {like}" if like else "" 2040 2041 scope = self.sql(expression, "scope") 2042 scope = f" {scope}" if scope else "" 2043 2044 scope_kind = self.sql(expression, "scope_kind") 2045 if scope_kind: 2046 scope_kind = f" IN {scope_kind}" 2047 2048 starts_with = self.sql(expression, "starts_with") 2049 if starts_with: 2050 starts_with = f" STARTS WITH {starts_with}" 2051 2052 limit = self.sql(expression, "limit") 2053 2054 from_ = self.sql(expression, "from_") 2055 if from_: 2056 from_ = f" FROM {from_}" 2057 2058 privileges = self.expressions(expression, key="privileges", flat=True) 2059 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 2060 2061 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
2063 def describe_sql(self, expression: exp.Describe) -> str: 2064 # Default to table if kind is unknown 2065 kind_value = expression.args.get("kind") or "TABLE" 2066 kind = f" {kind_value}" if kind_value else "" 2067 this = f" {self.sql(expression, 'this')}" 2068 expressions = self.expressions(expression, flat=True) 2069 expressions = f" {expressions}" if expressions else "" 2070 return f"DESCRIBE{kind}{this}{expressions}"
2072 def generatedasidentitycolumnconstraint_sql( 2073 self, expression: exp.GeneratedAsIdentityColumnConstraint 2074 ) -> str: 2075 start = expression.args.get("start") 2076 start = f" START {start}" if start else "" 2077 increment = expression.args.get("increment") 2078 increment = f" INCREMENT {increment}" if increment else "" 2079 2080 order = expression.args.get("order") 2081 if order is not None: 2082 order_clause = " ORDER" if order else " NOORDER" 2083 else: 2084 order_clause = "" 2085 2086 return f"AUTOINCREMENT{start}{increment}{order_clause}"
2091 def struct_sql(self, expression: exp.Struct) -> str: 2092 if len(expression.expressions) == 1: 2093 arg = expression.expressions[0] 2094 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 2095 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 2096 return f"{{{self.sql(expression.expressions[0])}}}" 2097 2098 keys = [] 2099 values = [] 2100 2101 for i, e in enumerate(expression.expressions): 2102 if isinstance(e, exp.PropertyEQ): 2103 keys.append( 2104 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 2105 ) 2106 values.append(e.expression) 2107 else: 2108 keys.append(exp.Literal.string(f"_{i}")) 2109 values.append(e) 2110 2111 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
2117 def alterset_sql(self, expression: exp.AlterSet) -> str: 2118 exprs = self.expressions(expression, flat=True) 2119 exprs = f" {exprs}" if exprs else "" 2120 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 2121 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 2122 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 2123 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 2124 tag = self.expressions(expression, key="tag", flat=True) 2125 tag = f" TAG {tag}" if tag else "" 2126 2127 return f"SET{exprs}{file_format}{copy_options}{tag}"
2129 def strtotime_sql(self, expression: exp.StrToTime): 2130 # target_type is stored as a DataType instance 2131 target_type = expression.args.get("target_type") 2132 2133 # Get the type enum from DataType instance or from type annotation 2134 if isinstance(target_type, exp.DataType): 2135 type_enum = target_type.this 2136 elif expression.type: 2137 type_enum = expression.type.this 2138 else: 2139 type_enum = exp.DataType.Type.TIMESTAMP 2140 2141 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 2142 2143 return self.func( 2144 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 2145 expression.this, 2146 self.format_time(expression), 2147 )
2158 def jsonextract_sql(self, expression: exp.JSONExtract): 2159 this = expression.this 2160 2161 # JSON strings are valid coming from other dialects such as BQ so 2162 # for these cases we PARSE_JSON preemptively 2163 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 2164 "requires_json" 2165 ): 2166 this = exp.ParseJSON(this=this) 2167 2168 return self.func( 2169 "GET_PATH", 2170 this, 2171 expression.expression, 2172 )
2181 def datesub_sql(self, expression: exp.DateSub) -> str: 2182 value = expression.expression 2183 if value: 2184 value.replace(value * (-1)) 2185 else: 2186 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2187 2188 return date_delta_sql("DATEADD")(self, expression)
2197 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2198 is_materialized = expression.find(exp.MaterializedProperty) 2199 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2200 2201 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2202 # For materialized views, COPY GRANTS is located *before* the columns list 2203 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2204 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2205 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2206 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2207 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2208 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2209 2210 this_name = self.sql(expression.this, "this") 2211 copy_grants = self.sql(copy_grants_property) 2212 this_schema = self.schema_columns_sql(expression.this) 2213 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2214 2215 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2216 2217 return super().createable_sql(expression, locations)
2219 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2220 this = expression.this 2221 2222 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2223 # and add it later as part of the WITHIN GROUP clause 2224 order = this if isinstance(this, exp.Order) else None 2225 if order: 2226 expression.set("this", order.this.pop()) 2227 2228 expr_sql = super().arrayagg_sql(expression) 2229 2230 if order: 2231 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2232 2233 return expr_sql
2235 def array_sql(self, expression: exp.Array) -> str: 2236 expressions = expression.expressions 2237 2238 first_expr = seq_get(expressions, 0) 2239 if isinstance(first_expr, exp.Select): 2240 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2241 if first_expr.text("kind").upper() == "STRUCT": 2242 object_construct_args = [] 2243 for expr in first_expr.expressions: 2244 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2245 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2246 name = expr.this if isinstance(expr, exp.Alias) else expr 2247 2248 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2249 2250 array_agg = exp.ArrayAgg( 2251 this=_build_object_construct(args=object_construct_args) 2252 ) 2253 2254 first_expr.set("kind", None) 2255 first_expr.set("expressions", [array_agg]) 2256 2257 return self.sql(first_expr.subquery()) 2258 2259 return inline_array_sql(self, expression)
2261 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2262 zone = self.sql(expression, "this") 2263 if not zone: 2264 return super().currentdate_sql(expression) 2265 2266 expr = exp.Cast( 2267 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2268 to=exp.DataType(this=exp.DataType.Type.DATE), 2269 ) 2270 return self.sql(expr)
2272 def dot_sql(self, expression: exp.Dot) -> str: 2273 this = expression.this 2274 2275 if not this.type: 2276 from sqlglot.optimizer.annotate_types import annotate_types 2277 2278 this = annotate_types(this, dialect=self.dialect) 2279 2280 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2281 # Generate colon notation for the top level STRUCT 2282 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2283 2284 return super().dot_sql(expression)
2295 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2296 # Set part_index to 1 if missing 2297 if not expression.args.get("delimiter"): 2298 expression.set("delimiter", exp.Literal.string(" ")) 2299 2300 if not expression.args.get("part_index"): 2301 expression.set("part_index", exp.Literal.number(1)) 2302 2303 return rename_func("SPLIT_PART")(self, expression)
2305 def uniform_sql(self, expression: exp.Uniform) -> str: 2306 gen = expression.args.get("gen") 2307 seed = expression.args.get("seed") 2308 2309 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2310 if seed: 2311 gen = exp.Rand(this=seed) 2312 2313 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2314 if not gen: 2315 gen = exp.Rand() 2316 2317 return self.func("UNIFORM", expression.this, expression.expression, gen)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- INOUT_SEPARATOR
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- DECLARE_DEFAULT_ASSIGNMENT
- UPDATE_STATEMENT_SUPPORTS_FROM
- STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- triggerproperties_sql
- triggerreferencing_sql
- triggerevent_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql
- block_sql
- storedprocedure_sql
- ifblock_sql
- whileblock_sql
- execute_sql
- executesql_sql