sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 build_like, 11 binary_from_function, 12 build_default_decimal_type, 13 build_replace_with_optional_replacement, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 unit_to_str, 26 var_map_sql, 27 map_date_part, 28 no_timestamp_sql, 29 strposition_sql, 30 timestampdiff_sql, 31 no_make_interval_sql, 32 groupconcat_sql, 33) 34from sqlglot.generator import unsupported_args 35from sqlglot.helper import find_new_name, flatten, is_date_unit, is_int, seq_get 36from sqlglot.optimizer.scope import build_scope, find_all_in_scope 37from sqlglot.tokens import TokenType 38from sqlglot.typing.snowflake import EXPRESSION_METADATA 39 40if t.TYPE_CHECKING: 41 from sqlglot._typing import E, B 42 43 44# Timestamp types used in _build_datetime 45TIMESTAMP_TYPES = { 46 exp.DataType.Type.TIMESTAMP: "TO_TIMESTAMP", 47 exp.DataType.Type.TIMESTAMPLTZ: "TO_TIMESTAMP_LTZ", 48 exp.DataType.Type.TIMESTAMPNTZ: "TO_TIMESTAMP_NTZ", 49 exp.DataType.Type.TIMESTAMPTZ: "TO_TIMESTAMP_TZ", 50} 51 52 53def _build_strtok(args: t.List) -> exp.SplitPart: 54 # Add default delimiter (space) if missing - per Snowflake docs 55 if len(args) == 1: 56 args.append(exp.Literal.string(" ")) 57 58 # Add default part_index (1) if missing 59 if len(args) == 2: 60 args.append(exp.Literal.number(1)) 61 62 return exp.SplitPart.from_arg_list(args) 63 64 65def _build_approx_top_k(args: t.List) -> exp.ApproxTopK: 66 """ 67 Normalizes APPROX_TOP_K arguments to match Snowflake semantics. 68 69 Snowflake APPROX_TOP_K signature: APPROX_TOP_K(column [, k] [, counters]) 70 - k defaults to 1 if omitted (per Snowflake documentation) 71 - counters is optional precision parameter 72 """ 73 # Add default k=1 if only column is provided 74 if len(args) == 1: 75 args.append(exp.Literal.number(1)) 76 77 return exp.ApproxTopK.from_arg_list(args) 78 79 80def _build_date_from_parts(args: t.List) -> exp.DateFromParts: 81 return exp.DateFromParts( 82 year=seq_get(args, 0), 83 month=seq_get(args, 1), 84 day=seq_get(args, 2), 85 allow_overflow=True, 86 ) 87 88 89def _build_datetime( 90 name: str, kind: exp.DataType.Type, safe: bool = False 91) -> t.Callable[[t.List], exp.Func]: 92 def _builder(args: t.List) -> exp.Func: 93 value = seq_get(args, 0) 94 scale_or_fmt = seq_get(args, 1) 95 96 int_value = value is not None and is_int(value.name) 97 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 98 99 if isinstance(value, (exp.Literal, exp.Neg)) or (value and scale_or_fmt): 100 # Converts calls like `TO_TIME('01:02:03')` into casts 101 if len(args) == 1 and value.is_string and not int_value: 102 return ( 103 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 104 if safe 105 else exp.cast(value, kind) 106 ) 107 108 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 109 # cases so we can transpile them, since they're relatively common 110 if kind in TIMESTAMP_TYPES: 111 if not safe and (int_scale_or_fmt or (int_value and scale_or_fmt is None)): 112 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 113 # it's not easily transpilable. Also, numeric-looking strings with 114 # format strings (e.g., TO_TIMESTAMP('20240115', 'YYYYMMDD')) should 115 # use StrToTime, not UnixToTime. 116 unix_expr = exp.UnixToTime(this=value, scale=scale_or_fmt) 117 unix_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) 118 return unix_expr 119 if scale_or_fmt and not int_scale_or_fmt: 120 # Format string provided (e.g., 'YYYY-MM-DD'), use StrToTime 121 strtotime_expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 122 strtotime_expr.set("safe", safe) 123 strtotime_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) 124 return strtotime_expr 125 126 # Handle DATE/TIME with format strings - allow int_value if a format string is provided 127 has_format_string = scale_or_fmt and not int_scale_or_fmt 128 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and ( 129 not int_value or has_format_string 130 ): 131 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 132 formatted_exp = build_formatted_time(klass, "snowflake")(args) 133 formatted_exp.set("safe", safe) 134 return formatted_exp 135 136 return exp.Anonymous(this=name, expressions=args) 137 138 return _builder 139 140 141def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 142 expression = parser.build_var_map(args) 143 144 if isinstance(expression, exp.StarMap): 145 return expression 146 147 return exp.Struct( 148 expressions=[ 149 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 150 ] 151 ) 152 153 154def _build_datediff(args: t.List) -> exp.DateDiff: 155 return exp.DateDiff( 156 this=seq_get(args, 2), 157 expression=seq_get(args, 1), 158 unit=map_date_part(seq_get(args, 0)), 159 date_part_boundary=True, 160 ) 161 162 163def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 164 def _builder(args: t.List) -> E: 165 return expr_type( 166 this=seq_get(args, 2), 167 expression=seq_get(args, 1), 168 unit=map_date_part(seq_get(args, 0)), 169 ) 170 171 return _builder 172 173 174def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 175 def _builder(args: t.List) -> B | exp.Anonymous: 176 if len(args) == 3: 177 # Special handling for bitwise operations with padside argument 178 if expr_type in (exp.BitwiseAnd, exp.BitwiseOr, exp.BitwiseXor): 179 return expr_type( 180 this=seq_get(args, 0), expression=seq_get(args, 1), padside=seq_get(args, 2) 181 ) 182 return exp.Anonymous(this=name, expressions=args) 183 184 result = binary_from_function(expr_type)(args) 185 186 # Snowflake specifies INT128 for bitwise shifts 187 if expr_type in (exp.BitwiseLeftShift, exp.BitwiseRightShift): 188 result.set("requires_int128", True) 189 190 return result 191 192 return _builder 193 194 195# https://docs.snowflake.com/en/sql-reference/functions/div0 196def _build_if_from_div0(args: t.List) -> exp.If: 197 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 198 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 199 200 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 201 exp.Is(this=lhs, expression=exp.null()).not_() 202 ) 203 true = exp.Literal.number(0) 204 false = exp.Div(this=lhs, expression=rhs) 205 return exp.If(this=cond, true=true, false=false) 206 207 208# https://docs.snowflake.com/en/sql-reference/functions/div0null 209def _build_if_from_div0null(args: t.List) -> exp.If: 210 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 211 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 212 213 # Returns 0 when divisor is 0 OR NULL 214 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).or_( 215 exp.Is(this=rhs, expression=exp.null()) 216 ) 217 true = exp.Literal.number(0) 218 false = exp.Div(this=lhs, expression=rhs) 219 return exp.If(this=cond, true=true, false=false) 220 221 222# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 223def _build_if_from_zeroifnull(args: t.List) -> exp.If: 224 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 225 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 226 227 228def _build_search(args: t.List) -> exp.Search: 229 kwargs = { 230 "this": seq_get(args, 0), 231 "expression": seq_get(args, 1), 232 **{arg.name.lower(): arg for arg in args[2:] if isinstance(arg, exp.Kwarg)}, 233 } 234 return exp.Search(**kwargs) 235 236 237# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 238def _build_if_from_nullifzero(args: t.List) -> exp.If: 239 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 240 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 241 242 243def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 244 flag = expression.text("flag") 245 246 if "i" not in flag: 247 flag += "i" 248 249 return self.func( 250 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 251 ) 252 253 254def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 255 regexp_replace = exp.RegexpReplace.from_arg_list(args) 256 257 if not regexp_replace.args.get("replacement"): 258 regexp_replace.set("replacement", exp.Literal.string("")) 259 260 return regexp_replace 261 262 263def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 264 def _parse(self: Snowflake.Parser) -> exp.Show: 265 return self._parse_show_snowflake(*args, **kwargs) 266 267 return _parse 268 269 270def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 271 trunc = date_trunc_to_time(args) 272 unit = map_date_part(trunc.args["unit"]) 273 trunc.set("unit", unit) 274 is_time_input = trunc.this.is_type(exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ) 275 if (isinstance(trunc, exp.TimestampTrunc) and is_date_unit(unit) or is_time_input) or ( 276 isinstance(trunc, exp.DateTrunc) and not is_date_unit(unit) 277 ): 278 trunc.set("input_type_preserved", True) 279 return trunc 280 281 282def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 283 """ 284 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 285 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 286 287 Example: 288 >>> from sqlglot import parse_one 289 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 290 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 291 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 292 """ 293 if isinstance(expression, exp.Pivot): 294 if expression.unpivot: 295 expression = transforms.unqualify_columns(expression) 296 else: 297 for field in expression.fields: 298 field_expr = seq_get(field.expressions if field else [], 0) 299 300 if isinstance(field_expr, exp.PivotAny): 301 unqualified_field_expr = transforms.unqualify_columns(field_expr) 302 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 303 304 return expression 305 306 307def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 308 assert isinstance(expression, exp.Create) 309 310 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 311 if expression.this in exp.DataType.NESTED_TYPES: 312 expression.set("expressions", None) 313 return expression 314 315 props = expression.args.get("properties") 316 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 317 for schema_expression in expression.this.expressions: 318 if isinstance(schema_expression, exp.ColumnDef): 319 column_type = schema_expression.kind 320 if isinstance(column_type, exp.DataType): 321 column_type.transform(_flatten_structured_type, copy=False) 322 323 return expression 324 325 326def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 327 generate_date_array = unnest.expressions[0] 328 start = generate_date_array.args.get("start") 329 end = generate_date_array.args.get("end") 330 step = generate_date_array.args.get("step") 331 332 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 333 return 334 335 unit = step.args.get("unit") 336 337 unnest_alias = unnest.args.get("alias") 338 if unnest_alias: 339 unnest_alias = unnest_alias.copy() 340 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 341 else: 342 sequence_value_name = "value" 343 344 # We'll add the next sequence value to the starting date and project the result 345 date_add = _build_date_time_add(exp.DateAdd)( 346 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 347 ) 348 349 # We use DATEDIFF to compute the number of sequence values needed 350 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 351 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 352 ) 353 354 unnest.set("expressions", [number_sequence]) 355 356 unnest_parent = unnest.parent 357 if isinstance(unnest_parent, exp.Join): 358 select = unnest_parent.parent 359 if isinstance(select, exp.Select): 360 replace_column_name = ( 361 sequence_value_name 362 if isinstance(sequence_value_name, str) 363 else sequence_value_name.name 364 ) 365 366 scope = build_scope(select) 367 if scope: 368 for column in scope.columns: 369 if column.name.lower() == replace_column_name.lower(): 370 column.replace( 371 date_add.as_(replace_column_name) 372 if isinstance(column.parent, exp.Select) 373 else date_add 374 ) 375 376 lateral = exp.Lateral(this=unnest_parent.this.pop()) 377 unnest_parent.replace(exp.Join(this=lateral)) 378 else: 379 unnest.replace( 380 exp.select(date_add.as_(sequence_value_name)) 381 .from_(unnest.copy()) 382 .subquery(unnest_alias) 383 ) 384 385 386def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 387 if isinstance(expression, exp.Select): 388 for generate_date_array in expression.find_all(exp.GenerateDateArray): 389 parent = generate_date_array.parent 390 391 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 392 # query is the following (it'll be unnested properly on the next iteration due to copy): 393 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 394 if not isinstance(parent, exp.Unnest): 395 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 396 generate_date_array.replace( 397 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 398 ) 399 400 if ( 401 isinstance(parent, exp.Unnest) 402 and isinstance(parent.parent, (exp.From, exp.Join)) 403 and len(parent.expressions) == 1 404 ): 405 _unnest_generate_date_array(parent) 406 407 return expression 408 409 410def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Snowflake], E]: 411 def _builder(args: t.List, dialect: Snowflake) -> E: 412 return expr_type( 413 this=seq_get(args, 0), 414 expression=seq_get(args, 1), 415 position=seq_get(args, 2), 416 occurrence=seq_get(args, 3), 417 parameters=seq_get(args, 4), 418 group=seq_get(args, 5) or exp.Literal.number(0), 419 **( 420 {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} 421 if expr_type is exp.RegexpExtract 422 else {} 423 ), 424 ) 425 426 return _builder 427 428 429def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 430 # Other dialects don't support all of the following parameters, so we need to 431 # generate default values as necessary to ensure the transpilation is correct 432 group = expression.args.get("group") 433 434 # To avoid generating all these default values, we set group to None if 435 # it's 0 (also default value) which doesn't trigger the following chain 436 if group and group.name == "0": 437 group = None 438 439 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 440 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 441 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 442 443 return self.func( 444 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 445 expression.this, 446 expression.expression, 447 position, 448 occurrence, 449 parameters, 450 group, 451 ) 452 453 454def _json_extract_value_array_sql( 455 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 456) -> str: 457 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 458 ident = exp.to_identifier("x") 459 460 if isinstance(expression, exp.JSONValueArray): 461 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 462 else: 463 this = exp.ParseJSON(this=f"TO_JSON({ident})") 464 465 transform_lambda = exp.Lambda(expressions=[ident], this=this) 466 467 return self.func("TRANSFORM", json_extract, transform_lambda) 468 469 470def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 471 if isinstance(expression, exp.Select): 472 scope = build_scope(expression) 473 if not scope: 474 return expression 475 476 unnests = list(scope.find_all(exp.Unnest)) 477 478 if not unnests: 479 return expression 480 481 taken_source_names = set(scope.sources) 482 column_source: t.Dict[str, exp.Identifier] = {} 483 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 484 485 unnest_identifier: t.Optional[exp.Identifier] = None 486 orig_expression = expression.copy() 487 488 for unnest in unnests: 489 if not isinstance(unnest.parent, (exp.From, exp.Join)): 490 continue 491 492 # Try to infer column names produced by an unnest operator. This is only possible 493 # when we can peek into the (statically known) contents of the unnested value. 494 unnest_columns: t.Set[str] = set() 495 for unnest_expr in unnest.expressions: 496 if not isinstance(unnest_expr, exp.Array): 497 continue 498 499 for array_expr in unnest_expr.expressions: 500 if not ( 501 isinstance(array_expr, exp.Struct) 502 and array_expr.expressions 503 and all( 504 isinstance(struct_expr, exp.PropertyEQ) 505 for struct_expr in array_expr.expressions 506 ) 507 ): 508 continue 509 510 unnest_columns.update( 511 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 512 ) 513 break 514 515 if unnest_columns: 516 break 517 518 unnest_alias = unnest.args.get("alias") 519 if not unnest_alias: 520 alias_name = find_new_name(taken_source_names, "value") 521 taken_source_names.add(alias_name) 522 523 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 524 # will be corrected later, when we generate SQL for the `Unnest` AST node. 525 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 526 scope.replace(unnest, aliased_unnest) 527 528 unnest_identifier = aliased_unnest.args["alias"].columns[0] 529 else: 530 alias_columns = getattr(unnest_alias, "columns", []) 531 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 532 533 if not isinstance(unnest_identifier, exp.Identifier): 534 return orig_expression 535 536 unnest_to_identifier[unnest] = unnest_identifier 537 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 538 539 for column in scope.columns: 540 if column.table: 541 continue 542 543 table = column_source.get(column.name.lower()) 544 if ( 545 unnest_identifier 546 and not table 547 and len(scope.sources) == 1 548 and column.name.lower() != unnest_identifier.name.lower() 549 ): 550 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 551 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 552 if ( 553 isinstance(unnest_ancestor, exp.Unnest) 554 and ancestor_identifier 555 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 556 ): 557 continue 558 559 table = unnest_identifier 560 561 column.set("table", table and table.copy()) 562 563 return expression 564 565 566def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 567 if isinstance(expression, exp.Select): 568 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 569 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 570 # by Snowflake's parser. 571 # 572 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 573 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 574 unnest_aliases = set() 575 for unnest in find_all_in_scope(expression, exp.Unnest): 576 unnest_alias = unnest.args.get("alias") 577 if ( 578 isinstance(unnest_alias, exp.TableAlias) 579 and not unnest_alias.this 580 and len(unnest_alias.columns) == 1 581 ): 582 unnest_aliases.add(unnest_alias.columns[0].name) 583 584 if unnest_aliases: 585 for c in find_all_in_scope(expression, exp.Column): 586 if c.table in unnest_aliases: 587 bracket_lhs = c.args["table"] 588 bracket_rhs = exp.Literal.string(c.name) 589 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 590 591 if c.parent is expression: 592 # Retain column projection names by using aliases 593 c.replace(exp.alias_(bracket, c.this.copy())) 594 else: 595 c.replace(bracket) 596 597 return expression 598 599 600def _build_timestamp_from_parts(args: t.List) -> exp.Func: 601 """Build TimestampFromParts with support for both syntaxes: 602 1. TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second [, nanosecond] [, time_zone]) 603 2. TIMESTAMP_FROM_PARTS(date_expr, time_expr) - Snowflake specific 604 """ 605 if len(args) == 2: 606 return exp.TimestampFromParts(this=seq_get(args, 0), expression=seq_get(args, 1)) 607 608 return exp.TimestampFromParts.from_arg_list(args) 609 610 611def _build_round(args: t.List) -> exp.Round: 612 """ 613 Build Round expression, unwrapping Snowflake's named parameters. 614 615 Maps EXPR => this, SCALE => decimals, ROUNDING_MODE => truncate. 616 617 Note: Snowflake does not support mixing named and positional arguments. 618 Arguments are either all named or all positional. 619 """ 620 kwarg_map = {"EXPR": "this", "SCALE": "decimals", "ROUNDING_MODE": "truncate"} 621 round_args = {} 622 positional_keys = ["this", "decimals", "truncate"] 623 positional_idx = 0 624 625 for arg in args: 626 if isinstance(arg, exp.Kwarg): 627 key = arg.this.name.upper() 628 round_key = kwarg_map.get(key) 629 if round_key: 630 round_args[round_key] = arg.expression 631 else: 632 if positional_idx < len(positional_keys): 633 round_args[positional_keys[positional_idx]] = arg 634 positional_idx += 1 635 636 expression = exp.Round(**round_args) 637 expression.set("casts_non_integer_decimals", True) 638 return expression 639 640 641def _build_try_to_number(args: t.List[exp.Expression]) -> exp.Expression: 642 return exp.ToNumber( 643 this=seq_get(args, 0), 644 format=seq_get(args, 1), 645 precision=seq_get(args, 2), 646 scale=seq_get(args, 3), 647 safe=True, 648 ) 649 650 651class Snowflake(Dialect): 652 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 653 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 654 NULL_ORDERING = "nulls_are_large" 655 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 656 SUPPORTS_USER_DEFINED_TYPES = False 657 SUPPORTS_SEMI_ANTI_JOIN = False 658 PREFER_CTE_ALIAS_COLUMN = True 659 TABLESAMPLE_SIZE_IS_PERCENT = True 660 COPY_PARAMS_ARE_CSV = False 661 ARRAY_AGG_INCLUDES_NULLS = None 662 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 663 TRY_CAST_REQUIRES_STRING = True 664 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 665 LEAST_GREATEST_IGNORES_NULLS = False 666 667 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 668 669 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 670 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 671 672 TIME_MAPPING = { 673 "YYYY": "%Y", 674 "yyyy": "%Y", 675 "YY": "%y", 676 "yy": "%y", 677 "MMMM": "%B", 678 "mmmm": "%B", 679 "MON": "%b", 680 "mon": "%b", 681 "MM": "%m", 682 "mm": "%m", 683 "DD": "%d", 684 "dd": "%-d", 685 "DY": "%a", 686 "dy": "%w", 687 "HH24": "%H", 688 "hh24": "%H", 689 "HH12": "%I", 690 "hh12": "%I", 691 "MI": "%M", 692 "mi": "%M", 693 "SS": "%S", 694 "ss": "%S", 695 "FF6": "%f", 696 "ff6": "%f", 697 # Seems like Snowflake treats AM/PM in the format string as equivalent, 698 # only the time (stamp) value's AM/PM affects the output 699 "AM": "%p", 700 "am": "%p", 701 "PM": "%p", 702 "pm": "%p", 703 } 704 705 DATE_PART_MAPPING = { 706 **Dialect.DATE_PART_MAPPING, 707 "ISOWEEK": "WEEKISO", 708 } 709 710 PSEUDOCOLUMNS = {"LEVEL"} 711 712 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 713 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 714 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 715 return super().can_quote(identifier, identify) and not ( 716 isinstance(identifier.parent, exp.Table) 717 and not identifier.quoted 718 and identifier.name.lower() == "dual" 719 ) 720 721 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 722 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 723 SINGLE_TOKENS.pop("$") 724 725 class Parser(parser.Parser): 726 IDENTIFY_PIVOT_STRINGS = True 727 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 728 COLON_IS_VARIANT_EXTRACT = True 729 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 730 731 ID_VAR_TOKENS = { 732 *parser.Parser.ID_VAR_TOKENS, 733 TokenType.EXCEPT, 734 TokenType.MATCH_CONDITION, 735 } 736 737 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 738 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 739 740 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 741 742 FUNCTIONS = { 743 **parser.Parser.FUNCTIONS, 744 "ADD_MONTHS": lambda args: exp.AddMonths( 745 this=seq_get(args, 0), 746 expression=seq_get(args, 1), 747 preserve_end_of_month=True, 748 ), 749 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 750 "APPROX_TOP_K": _build_approx_top_k, 751 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 752 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 753 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 754 ), 755 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 756 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 757 start=seq_get(args, 0), 758 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 759 step=seq_get(args, 2), 760 ), 761 "ARRAY_SORT": exp.SortArray.from_arg_list, 762 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 763 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 764 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 765 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 766 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 767 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 768 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 769 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 770 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 771 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 772 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 773 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 774 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 775 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 776 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 777 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 778 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 779 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 780 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 781 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 782 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 783 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 784 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 785 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 786 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 787 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 788 "CORR": lambda args: exp.Corr( 789 this=seq_get(args, 0), 790 expression=seq_get(args, 1), 791 null_on_zero_variance=True, 792 ), 793 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 794 "DATEFROMPARTS": _build_date_from_parts, 795 "DATE_FROM_PARTS": _build_date_from_parts, 796 "DATE_TRUNC": _date_trunc_to_time, 797 "DATEADD": _build_date_time_add(exp.DateAdd), 798 "DATEDIFF": _build_datediff, 799 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 800 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 801 "DIV0": _build_if_from_div0, 802 "DIV0NULL": _build_if_from_div0null, 803 "EDITDISTANCE": lambda args: exp.Levenshtein( 804 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 805 ), 806 "FLATTEN": exp.Explode.from_arg_list, 807 "GET": exp.GetExtract.from_arg_list, 808 "GETDATE": exp.CurrentTimestamp.from_arg_list, 809 "GET_PATH": lambda args, dialect: exp.JSONExtract( 810 this=seq_get(args, 0), 811 expression=dialect.to_json_path(seq_get(args, 1)), 812 requires_json=True, 813 ), 814 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 815 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 816 ), 817 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 818 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 819 ), 820 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 821 "IFF": exp.If.from_arg_list, 822 "MD5_HEX": exp.MD5.from_arg_list, 823 "MD5_BINARY": exp.MD5Digest.from_arg_list, 824 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 825 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 826 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 827 "LAST_DAY": lambda args: exp.LastDay( 828 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 829 ), 830 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 831 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 832 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 833 "NULLIFZERO": _build_if_from_nullifzero, 834 "OBJECT_CONSTRUCT": _build_object_construct, 835 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 836 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 837 "PARSE_URL": lambda args: exp.ParseUrl( 838 this=seq_get(args, 0), permissive=seq_get(args, 1) 839 ), 840 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 841 "REGEXP_REPLACE": _build_regexp_replace, 842 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 843 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 844 "REPLACE": build_replace_with_optional_replacement, 845 "RLIKE": exp.RegexpLike.from_arg_list, 846 "ROUND": _build_round, 847 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 848 "SHA1_HEX": exp.SHA.from_arg_list, 849 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 850 "SHA2_HEX": exp.SHA2.from_arg_list, 851 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 852 "STDDEV_SAMP": exp.Stddev.from_arg_list, 853 "STRTOK": _build_strtok, 854 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 855 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 856 "TIMEADD": _build_date_time_add(exp.TimeAdd), 857 "TIMEDIFF": _build_datediff, 858 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 859 "TIMESTAMPDIFF": _build_datediff, 860 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 861 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 862 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 863 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 864 "TRY_DECRYPT": lambda args: exp.Decrypt( 865 this=seq_get(args, 0), 866 passphrase=seq_get(args, 1), 867 aad=seq_get(args, 2), 868 encryption_method=seq_get(args, 3), 869 safe=True, 870 ), 871 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 872 this=seq_get(args, 0), 873 key=seq_get(args, 1), 874 iv=seq_get(args, 2), 875 aad=seq_get(args, 3), 876 encryption_method=seq_get(args, 4), 877 aead=seq_get(args, 5), 878 safe=True, 879 ), 880 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 881 "TRY_TO_BINARY": lambda args: exp.ToBinary( 882 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 883 ), 884 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 885 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 886 **dict.fromkeys( 887 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 888 ), 889 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 890 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 891 ), 892 "TRY_TO_FILE": lambda args: exp.ToFile( 893 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 894 ), 895 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 896 "TRY_TO_TIMESTAMP": _build_datetime( 897 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 898 ), 899 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 900 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 901 ), 902 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 903 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 904 ), 905 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 906 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 907 ), 908 "TO_CHAR": build_timetostr_or_tochar, 909 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 910 **dict.fromkeys( 911 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 912 lambda args: exp.ToNumber( 913 this=seq_get(args, 0), 914 format=seq_get(args, 1), 915 precision=seq_get(args, 2), 916 scale=seq_get(args, 3), 917 ), 918 ), 919 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 920 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 921 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 922 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 923 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 924 "TO_VARCHAR": build_timetostr_or_tochar, 925 "TO_JSON": exp.JSONFormat.from_arg_list, 926 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 927 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 928 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 929 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 930 "ZEROIFNULL": _build_if_from_zeroifnull, 931 "LIKE": build_like(exp.Like), 932 "ILIKE": build_like(exp.ILike), 933 "SEARCH": _build_search, 934 "SKEW": exp.Skewness.from_arg_list, 935 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 936 "WEEKISO": exp.WeekOfYear.from_arg_list, 937 "WEEKOFYEAR": exp.Week.from_arg_list, 938 } 939 FUNCTIONS.pop("PREDICT") 940 941 FUNCTION_PARSERS = { 942 **parser.Parser.FUNCTION_PARSERS, 943 "DATE_PART": lambda self: self._parse_date_part(), 944 "DIRECTORY": lambda self: self._parse_directory(), 945 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 946 "LISTAGG": lambda self: self._parse_string_agg(), 947 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 948 } 949 FUNCTION_PARSERS.pop("TRIM") 950 951 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 952 953 ALTER_PARSERS = { 954 **parser.Parser.ALTER_PARSERS, 955 "SESSION": lambda self: self._parse_alter_session(), 956 "UNSET": lambda self: self.expression( 957 exp.Set, 958 tag=self._match_text_seq("TAG"), 959 expressions=self._parse_csv(self._parse_id_var), 960 unset=True, 961 ), 962 } 963 964 STATEMENT_PARSERS = { 965 **parser.Parser.STATEMENT_PARSERS, 966 TokenType.GET: lambda self: self._parse_get(), 967 TokenType.PUT: lambda self: self._parse_put(), 968 TokenType.SHOW: lambda self: self._parse_show(), 969 } 970 971 PROPERTY_PARSERS = { 972 **parser.Parser.PROPERTY_PARSERS, 973 "CREDENTIALS": lambda self: self._parse_credentials_property(), 974 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 975 "LOCATION": lambda self: self._parse_location_property(), 976 "TAG": lambda self: self._parse_tag(), 977 "USING": lambda self: self._match_text_seq("TEMPLATE") 978 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 979 } 980 981 TYPE_CONVERTERS = { 982 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 983 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 984 } 985 986 SHOW_PARSERS = { 987 "DATABASES": _show_parser("DATABASES"), 988 "TERSE DATABASES": _show_parser("DATABASES"), 989 "SCHEMAS": _show_parser("SCHEMAS"), 990 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 991 "OBJECTS": _show_parser("OBJECTS"), 992 "TERSE OBJECTS": _show_parser("OBJECTS"), 993 "TABLES": _show_parser("TABLES"), 994 "TERSE TABLES": _show_parser("TABLES"), 995 "VIEWS": _show_parser("VIEWS"), 996 "TERSE VIEWS": _show_parser("VIEWS"), 997 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 998 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 999 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1000 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1001 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1002 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1003 "SEQUENCES": _show_parser("SEQUENCES"), 1004 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1005 "STAGES": _show_parser("STAGES"), 1006 "COLUMNS": _show_parser("COLUMNS"), 1007 "USERS": _show_parser("USERS"), 1008 "TERSE USERS": _show_parser("USERS"), 1009 "FILE FORMATS": _show_parser("FILE FORMATS"), 1010 "FUNCTIONS": _show_parser("FUNCTIONS"), 1011 "PROCEDURES": _show_parser("PROCEDURES"), 1012 "WAREHOUSES": _show_parser("WAREHOUSES"), 1013 } 1014 1015 CONSTRAINT_PARSERS = { 1016 **parser.Parser.CONSTRAINT_PARSERS, 1017 "WITH": lambda self: self._parse_with_constraint(), 1018 "MASKING": lambda self: self._parse_with_constraint(), 1019 "PROJECTION": lambda self: self._parse_with_constraint(), 1020 "TAG": lambda self: self._parse_with_constraint(), 1021 } 1022 1023 STAGED_FILE_SINGLE_TOKENS = { 1024 TokenType.DOT, 1025 TokenType.MOD, 1026 TokenType.SLASH, 1027 } 1028 1029 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1030 1031 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1032 1033 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1034 1035 LAMBDAS = { 1036 **parser.Parser.LAMBDAS, 1037 TokenType.ARROW: lambda self, expressions: self.expression( 1038 exp.Lambda, 1039 this=self._replace_lambda( 1040 self._parse_assignment(), 1041 expressions, 1042 ), 1043 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1044 ), 1045 } 1046 1047 COLUMN_OPERATORS = { 1048 **parser.Parser.COLUMN_OPERATORS, 1049 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1050 exp.ModelAttribute, this=this, expression=attr 1051 ), 1052 } 1053 1054 def _parse_directory(self) -> exp.DirectoryStage: 1055 table = self._parse_table_parts() 1056 1057 if isinstance(table, exp.Table): 1058 table = table.this 1059 1060 return self.expression(exp.DirectoryStage, this=table) 1061 1062 def _parse_use(self) -> exp.Use: 1063 if self._match_text_seq("SECONDARY", "ROLES"): 1064 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1065 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1066 return self.expression( 1067 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1068 ) 1069 1070 return super()._parse_use() 1071 1072 def _negate_range( 1073 self, this: t.Optional[exp.Expression] = None 1074 ) -> t.Optional[exp.Expression]: 1075 if not this: 1076 return this 1077 1078 query = this.args.get("query") 1079 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1080 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1081 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1082 # which can produce different results (most likely a SnowFlake bug). 1083 # 1084 # https://docs.snowflake.com/en/sql-reference/functions/in 1085 # Context: https://github.com/tobymao/sqlglot/issues/3890 1086 return self.expression( 1087 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1088 ) 1089 1090 return self.expression(exp.Not, this=this) 1091 1092 def _parse_tag(self) -> exp.Tags: 1093 return self.expression( 1094 exp.Tags, 1095 expressions=self._parse_wrapped_csv(self._parse_property), 1096 ) 1097 1098 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1099 if self._prev.token_type != TokenType.WITH: 1100 self._retreat(self._index - 1) 1101 1102 if self._match_text_seq("MASKING", "POLICY"): 1103 policy = self._parse_column() 1104 return self.expression( 1105 exp.MaskingPolicyColumnConstraint, 1106 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1107 expressions=self._match(TokenType.USING) 1108 and self._parse_wrapped_csv(self._parse_id_var), 1109 ) 1110 if self._match_text_seq("PROJECTION", "POLICY"): 1111 policy = self._parse_column() 1112 return self.expression( 1113 exp.ProjectionPolicyColumnConstraint, 1114 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1115 ) 1116 if self._match(TokenType.TAG): 1117 return self._parse_tag() 1118 1119 return None 1120 1121 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1122 if self._match(TokenType.TAG): 1123 return self._parse_tag() 1124 1125 return super()._parse_with_property() 1126 1127 def _parse_create(self) -> exp.Create | exp.Command: 1128 expression = super()._parse_create() 1129 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1130 # Replace the Table node with the enclosed Identifier 1131 expression.this.replace(expression.this.this) 1132 1133 return expression 1134 1135 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1136 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1137 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1138 this = self._parse_var() or self._parse_type() 1139 1140 if not this: 1141 return None 1142 1143 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1144 expression = ( 1145 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1146 ) 1147 return self.expression(exp.Extract, this=map_date_part(this), expression=expression) 1148 1149 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1150 if is_map: 1151 # Keys are strings in Snowflake's objects, see also: 1152 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1153 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1154 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1155 1156 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1157 1158 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1159 lateral = super()._parse_lateral() 1160 if not lateral: 1161 return lateral 1162 1163 if isinstance(lateral.this, exp.Explode): 1164 table_alias = lateral.args.get("alias") 1165 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1166 if table_alias and not table_alias.args.get("columns"): 1167 table_alias.set("columns", columns) 1168 elif not table_alias: 1169 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1170 1171 return lateral 1172 1173 def _parse_table_parts( 1174 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1175 ) -> exp.Table: 1176 # https://docs.snowflake.com/en/user-guide/querying-stage 1177 if self._match(TokenType.STRING, advance=False): 1178 table = self._parse_string() 1179 elif self._match_text_seq("@", advance=False): 1180 table = self._parse_location_path() 1181 else: 1182 table = None 1183 1184 if table: 1185 file_format = None 1186 pattern = None 1187 1188 wrapped = self._match(TokenType.L_PAREN) 1189 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1190 if self._match_text_seq("FILE_FORMAT", "=>"): 1191 file_format = self._parse_string() or super()._parse_table_parts( 1192 is_db_reference=is_db_reference 1193 ) 1194 elif self._match_text_seq("PATTERN", "=>"): 1195 pattern = self._parse_string() 1196 else: 1197 break 1198 1199 self._match(TokenType.COMMA) 1200 1201 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1202 else: 1203 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1204 1205 return table 1206 1207 def _parse_table( 1208 self, 1209 schema: bool = False, 1210 joins: bool = False, 1211 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1212 parse_bracket: bool = False, 1213 is_db_reference: bool = False, 1214 parse_partition: bool = False, 1215 consume_pipe: bool = False, 1216 ) -> t.Optional[exp.Expression]: 1217 table = super()._parse_table( 1218 schema=schema, 1219 joins=joins, 1220 alias_tokens=alias_tokens, 1221 parse_bracket=parse_bracket, 1222 is_db_reference=is_db_reference, 1223 parse_partition=parse_partition, 1224 ) 1225 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1226 table_from_rows = table.this 1227 for arg in exp.TableFromRows.arg_types: 1228 if arg != "this": 1229 table_from_rows.set(arg, table.args.get(arg)) 1230 1231 table = table_from_rows 1232 1233 return table 1234 1235 def _parse_id_var( 1236 self, 1237 any_token: bool = True, 1238 tokens: t.Optional[t.Collection[TokenType]] = None, 1239 ) -> t.Optional[exp.Expression]: 1240 if self._match_text_seq("IDENTIFIER", "("): 1241 identifier = ( 1242 super()._parse_id_var(any_token=any_token, tokens=tokens) 1243 or self._parse_string() 1244 ) 1245 self._match_r_paren() 1246 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1247 1248 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1249 1250 def _parse_show_snowflake(self, this: str) -> exp.Show: 1251 scope = None 1252 scope_kind = None 1253 1254 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1255 # which is syntactically valid but has no effect on the output 1256 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1257 1258 history = self._match_text_seq("HISTORY") 1259 1260 like = self._parse_string() if self._match(TokenType.LIKE) else None 1261 1262 if self._match(TokenType.IN): 1263 if self._match_text_seq("ACCOUNT"): 1264 scope_kind = "ACCOUNT" 1265 elif self._match_text_seq("CLASS"): 1266 scope_kind = "CLASS" 1267 scope = self._parse_table_parts() 1268 elif self._match_text_seq("APPLICATION"): 1269 scope_kind = "APPLICATION" 1270 if self._match_text_seq("PACKAGE"): 1271 scope_kind += " PACKAGE" 1272 scope = self._parse_table_parts() 1273 elif self._match_set(self.DB_CREATABLES): 1274 scope_kind = self._prev.text.upper() 1275 if self._curr: 1276 scope = self._parse_table_parts() 1277 elif self._curr: 1278 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1279 scope = self._parse_table_parts() 1280 1281 return self.expression( 1282 exp.Show, 1283 terse=terse, 1284 this=this, 1285 history=history, 1286 like=like, 1287 scope=scope, 1288 scope_kind=scope_kind, 1289 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1290 limit=self._parse_limit(), 1291 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1292 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1293 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1294 ) 1295 1296 def _parse_put(self) -> exp.Put | exp.Command: 1297 if self._curr.token_type != TokenType.STRING: 1298 return self._parse_as_command(self._prev) 1299 1300 return self.expression( 1301 exp.Put, 1302 this=self._parse_string(), 1303 target=self._parse_location_path(), 1304 properties=self._parse_properties(), 1305 ) 1306 1307 def _parse_get(self) -> t.Optional[exp.Expression]: 1308 start = self._prev 1309 1310 # If we detect GET( then we need to parse a function, not a statement 1311 if self._match(TokenType.L_PAREN): 1312 self._retreat(self._index - 2) 1313 return self._parse_expression() 1314 1315 target = self._parse_location_path() 1316 1317 # Parse as command if unquoted file path 1318 if self._curr.token_type == TokenType.URI_START: 1319 return self._parse_as_command(start) 1320 1321 return self.expression( 1322 exp.Get, 1323 this=self._parse_string(), 1324 target=target, 1325 properties=self._parse_properties(), 1326 ) 1327 1328 def _parse_location_property(self) -> exp.LocationProperty: 1329 self._match(TokenType.EQ) 1330 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1331 1332 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1333 # Parse either a subquery or a staged file 1334 return ( 1335 self._parse_select(table=True, parse_subquery_alias=False) 1336 if self._match(TokenType.L_PAREN, advance=False) 1337 else self._parse_table_parts() 1338 ) 1339 1340 def _parse_location_path(self) -> exp.Var: 1341 start = self._curr 1342 self._advance_any(ignore_reserved=True) 1343 1344 # We avoid consuming a comma token because external tables like @foo and @bar 1345 # can be joined in a query with a comma separator, as well as closing paren 1346 # in case of subqueries 1347 while self._is_connected() and not self._match_set( 1348 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1349 ): 1350 self._advance_any(ignore_reserved=True) 1351 1352 return exp.var(self._find_sql(start, self._prev)) 1353 1354 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1355 this = super()._parse_lambda_arg() 1356 1357 if not this: 1358 return this 1359 1360 typ = self._parse_types() 1361 1362 if typ: 1363 return self.expression(exp.Cast, this=this, to=typ) 1364 1365 return this 1366 1367 def _parse_foreign_key(self) -> exp.ForeignKey: 1368 # inlineFK, the REFERENCES columns are implied 1369 if self._match(TokenType.REFERENCES, advance=False): 1370 return self.expression(exp.ForeignKey) 1371 1372 # outoflineFK, explicitly names the columns 1373 return super()._parse_foreign_key() 1374 1375 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1376 self._match(TokenType.EQ) 1377 if self._match(TokenType.L_PAREN, advance=False): 1378 expressions = self._parse_wrapped_options() 1379 else: 1380 expressions = [self._parse_format_name()] 1381 1382 return self.expression( 1383 exp.FileFormatProperty, 1384 expressions=expressions, 1385 ) 1386 1387 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1388 return self.expression( 1389 exp.CredentialsProperty, 1390 expressions=self._parse_wrapped_options(), 1391 ) 1392 1393 def _parse_semantic_view(self) -> exp.SemanticView: 1394 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1395 1396 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1397 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1398 keyword = self._prev.text.lower() 1399 kwargs[keyword] = self._parse_csv(self._parse_disjunction) 1400 elif self._match_text_seq("WHERE"): 1401 kwargs["where"] = self._parse_expression() 1402 else: 1403 self.raise_error("Expecting ) or encountered unexpected keyword") 1404 break 1405 1406 return self.expression(exp.SemanticView, **kwargs) 1407 1408 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1409 set = super()._parse_set(unset=unset, tag=tag) 1410 1411 if isinstance(set, exp.Set): 1412 for expr in set.expressions: 1413 if isinstance(expr, exp.SetItem): 1414 expr.set("kind", "VARIABLE") 1415 return set 1416 1417 class Tokenizer(tokens.Tokenizer): 1418 STRING_ESCAPES = ["\\", "'"] 1419 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1420 RAW_STRINGS = ["$$"] 1421 COMMENTS = ["--", "//", ("/*", "*/")] 1422 NESTED_COMMENTS = False 1423 1424 KEYWORDS = { 1425 **tokens.Tokenizer.KEYWORDS, 1426 "BYTEINT": TokenType.INT, 1427 "FILE://": TokenType.URI_START, 1428 "FILE FORMAT": TokenType.FILE_FORMAT, 1429 "GET": TokenType.GET, 1430 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1431 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1432 "MINUS": TokenType.EXCEPT, 1433 "NCHAR VARYING": TokenType.VARCHAR, 1434 "PUT": TokenType.PUT, 1435 "REMOVE": TokenType.COMMAND, 1436 "RM": TokenType.COMMAND, 1437 "SAMPLE": TokenType.TABLE_SAMPLE, 1438 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1439 "SQL_DOUBLE": TokenType.DOUBLE, 1440 "SQL_VARCHAR": TokenType.VARCHAR, 1441 "STAGE": TokenType.STAGE, 1442 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1443 "STREAMLIT": TokenType.STREAMLIT, 1444 "TAG": TokenType.TAG, 1445 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1446 "TOP": TokenType.TOP, 1447 "WAREHOUSE": TokenType.WAREHOUSE, 1448 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1449 # FLOAT is a synonym for DOUBLE in Snowflake 1450 "FLOAT": TokenType.DOUBLE, 1451 } 1452 KEYWORDS.pop("/*+") 1453 1454 SINGLE_TOKENS = { 1455 **tokens.Tokenizer.SINGLE_TOKENS, 1456 "$": TokenType.PARAMETER, 1457 "!": TokenType.EXCLAMATION, 1458 } 1459 1460 VAR_SINGLE_TOKENS = {"$"} 1461 1462 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1463 1464 class Generator(generator.Generator): 1465 PARAMETER_TOKEN = "$" 1466 MATCHED_BY_SOURCE = False 1467 SINGLE_STRING_INTERVAL = True 1468 JOIN_HINTS = False 1469 TABLE_HINTS = False 1470 QUERY_HINTS = False 1471 AGGREGATE_FILTER_SUPPORTED = False 1472 SUPPORTS_TABLE_COPY = False 1473 COLLATE_IS_FUNC = True 1474 LIMIT_ONLY_LITERALS = True 1475 JSON_KEY_VALUE_PAIR_SEP = "," 1476 INSERT_OVERWRITE = " OVERWRITE INTO" 1477 STRUCT_DELIMITER = ("(", ")") 1478 COPY_PARAMS_ARE_WRAPPED = False 1479 COPY_PARAMS_EQ_REQUIRED = True 1480 STAR_EXCEPT = "EXCLUDE" 1481 SUPPORTS_EXPLODING_PROJECTIONS = False 1482 ARRAY_CONCAT_IS_VAR_LEN = False 1483 SUPPORTS_CONVERT_TIMEZONE = True 1484 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1485 SUPPORTS_MEDIAN = True 1486 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1487 SUPPORTS_DECODE_CASE = True 1488 IS_BOOL_ALLOWED = False 1489 1490 TRANSFORMS = { 1491 **generator.Generator.TRANSFORMS, 1492 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1493 exp.ArgMax: rename_func("MAX_BY"), 1494 exp.ArgMin: rename_func("MIN_BY"), 1495 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1496 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1497 exp.ArrayContains: lambda self, e: self.func( 1498 "ARRAY_CONTAINS", 1499 e.expression 1500 if e.args.get("ensure_variant") is False 1501 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1502 e.this, 1503 ), 1504 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1505 exp.AtTimeZone: lambda self, e: self.func( 1506 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1507 ), 1508 exp.BitwiseOr: rename_func("BITOR"), 1509 exp.BitwiseXor: rename_func("BITXOR"), 1510 exp.BitwiseAnd: rename_func("BITAND"), 1511 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1512 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1513 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1514 exp.BitwiseNot: rename_func("BITNOT"), 1515 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1516 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1517 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1518 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1519 if e.args.get("sysdate") 1520 else self.function_fallback_sql(e), 1521 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1522 if e.this 1523 else "CURRENT_TIMESTAMP", 1524 exp.DateAdd: date_delta_sql("DATEADD"), 1525 exp.DateDiff: date_delta_sql("DATEDIFF"), 1526 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1527 exp.DatetimeDiff: timestampdiff_sql, 1528 exp.DateStrToDate: datestrtodate_sql, 1529 exp.Decrypt: lambda self, e: self.func( 1530 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1531 e.this, 1532 e.args.get("passphrase"), 1533 e.args.get("aad"), 1534 e.args.get("encryption_method"), 1535 ), 1536 exp.DecryptRaw: lambda self, e: self.func( 1537 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1538 e.this, 1539 e.args.get("key"), 1540 e.args.get("iv"), 1541 e.args.get("aad"), 1542 e.args.get("encryption_method"), 1543 e.args.get("aead"), 1544 ), 1545 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1546 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1547 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1548 exp.DayOfYear: rename_func("DAYOFYEAR"), 1549 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1550 exp.Explode: rename_func("FLATTEN"), 1551 exp.Extract: lambda self, e: self.func( 1552 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1553 ), 1554 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1555 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1556 exp.FileFormatProperty: lambda self, 1557 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1558 exp.FromTimeZone: lambda self, e: self.func( 1559 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1560 ), 1561 exp.GenerateSeries: lambda self, e: self.func( 1562 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1563 ), 1564 exp.GetExtract: rename_func("GET"), 1565 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1566 exp.If: if_sql(name="IFF", false_value="NULL"), 1567 exp.JSONExtractArray: _json_extract_value_array_sql, 1568 exp.JSONExtractScalar: lambda self, e: self.func( 1569 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1570 ), 1571 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1572 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1573 exp.JSONPathRoot: lambda *_: "", 1574 exp.JSONValueArray: _json_extract_value_array_sql, 1575 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1576 rename_func("EDITDISTANCE") 1577 ), 1578 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1579 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1580 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1581 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1582 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1583 exp.MakeInterval: no_make_interval_sql, 1584 exp.Max: max_or_greatest, 1585 exp.Min: min_or_least, 1586 exp.ParseJSON: lambda self, e: self.func( 1587 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1588 ), 1589 exp.ToBinary: lambda self, e: self.func( 1590 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1591 ), 1592 exp.ToBoolean: lambda self, e: self.func( 1593 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1594 ), 1595 exp.ToDouble: lambda self, e: self.func( 1596 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1597 ), 1598 exp.ToFile: lambda self, e: self.func( 1599 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1600 ), 1601 exp.ToNumber: lambda self, e: self.func( 1602 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1603 e.this, 1604 e.args.get("format"), 1605 e.args.get("precision"), 1606 e.args.get("scale"), 1607 ), 1608 exp.JSONFormat: rename_func("TO_JSON"), 1609 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1610 exp.PercentileCont: transforms.preprocess( 1611 [transforms.add_within_group_for_percentiles] 1612 ), 1613 exp.PercentileDisc: transforms.preprocess( 1614 [transforms.add_within_group_for_percentiles] 1615 ), 1616 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1617 exp.RegexpExtract: _regexpextract_sql, 1618 exp.RegexpExtractAll: _regexpextract_sql, 1619 exp.RegexpILike: _regexpilike_sql, 1620 exp.Rand: rename_func("RANDOM"), 1621 exp.Select: transforms.preprocess( 1622 [ 1623 transforms.eliminate_window_clause, 1624 transforms.eliminate_distinct_on, 1625 transforms.explode_projection_to_unnest(), 1626 transforms.eliminate_semi_and_anti_joins, 1627 _transform_generate_date_array, 1628 _qualify_unnested_columns, 1629 _eliminate_dot_variant_lookup, 1630 ] 1631 ), 1632 exp.SHA: rename_func("SHA1"), 1633 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1634 exp.MD5Digest: rename_func("MD5_BINARY"), 1635 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1636 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1637 exp.LowerHex: rename_func("TO_CHAR"), 1638 exp.SortArray: rename_func("ARRAY_SORT"), 1639 exp.Skewness: rename_func("SKEW"), 1640 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1641 exp.StartsWith: rename_func("STARTSWITH"), 1642 exp.EndsWith: rename_func("ENDSWITH"), 1643 exp.StrPosition: lambda self, e: strposition_sql( 1644 self, e, func_name="CHARINDEX", supports_position=True 1645 ), 1646 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1647 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1648 exp.Stuff: rename_func("INSERT"), 1649 exp.StPoint: rename_func("ST_MAKEPOINT"), 1650 exp.TimeAdd: date_delta_sql("TIMEADD"), 1651 exp.TimeSlice: lambda self, e: self.func( 1652 "TIME_SLICE", 1653 e.this, 1654 e.expression, 1655 unit_to_str(e), 1656 e.args.get("kind"), 1657 ), 1658 exp.Timestamp: no_timestamp_sql, 1659 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1660 exp.TimestampDiff: lambda self, e: self.func( 1661 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1662 ), 1663 exp.TimestampTrunc: timestamptrunc_sql(), 1664 exp.TimeStrToTime: timestrtotime_sql, 1665 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1666 exp.ToArray: rename_func("TO_ARRAY"), 1667 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1668 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1669 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1670 exp.TsOrDsToDate: lambda self, e: self.func( 1671 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1672 ), 1673 exp.TsOrDsToTime: lambda self, e: self.func( 1674 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1675 ), 1676 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1677 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1678 exp.Uuid: rename_func("UUID_STRING"), 1679 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1680 exp.Booland: rename_func("BOOLAND"), 1681 exp.Boolor: rename_func("BOOLOR"), 1682 exp.WeekOfYear: rename_func("WEEKISO"), 1683 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1684 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1685 exp.Xor: rename_func("BOOLXOR"), 1686 exp.ByteLength: rename_func("OCTET_LENGTH"), 1687 exp.ArrayConcatAgg: lambda self, e: self.func( 1688 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1689 ), 1690 exp.SHA2Digest: lambda self, e: self.func( 1691 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1692 ), 1693 } 1694 1695 SUPPORTED_JSON_PATH_PARTS = { 1696 exp.JSONPathKey, 1697 exp.JSONPathRoot, 1698 exp.JSONPathSubscript, 1699 } 1700 1701 TYPE_MAPPING = { 1702 **generator.Generator.TYPE_MAPPING, 1703 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1704 exp.DataType.Type.NESTED: "OBJECT", 1705 exp.DataType.Type.STRUCT: "OBJECT", 1706 exp.DataType.Type.TEXT: "VARCHAR", 1707 } 1708 1709 TOKEN_MAPPING = { 1710 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1711 } 1712 1713 PROPERTIES_LOCATION = { 1714 **generator.Generator.PROPERTIES_LOCATION, 1715 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1716 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1717 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1718 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1719 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1720 } 1721 1722 UNSUPPORTED_VALUES_EXPRESSIONS = { 1723 exp.Map, 1724 exp.StarMap, 1725 exp.Struct, 1726 exp.VarMap, 1727 } 1728 1729 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1730 1731 def with_properties(self, properties: exp.Properties) -> str: 1732 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1733 1734 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1735 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1736 values_as_table = False 1737 1738 return super().values_sql(expression, values_as_table=values_as_table) 1739 1740 def datatype_sql(self, expression: exp.DataType) -> str: 1741 # Check if this is a FLOAT type nested inside a VECTOR type 1742 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1743 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1744 if expression.is_type(exp.DataType.Type.DOUBLE): 1745 parent = expression.parent 1746 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1747 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1748 return "FLOAT" 1749 1750 expressions = expression.expressions 1751 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1752 for field_type in expressions: 1753 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1754 if isinstance(field_type, exp.DataType): 1755 return "OBJECT" 1756 if ( 1757 isinstance(field_type, exp.ColumnDef) 1758 and field_type.this 1759 and field_type.this.is_string 1760 ): 1761 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1762 # converting 'foo' into an identifier, we also need to quote it because these 1763 # keys are case-sensitive. For example: 1764 # 1765 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1766 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1767 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1768 1769 return super().datatype_sql(expression) 1770 1771 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1772 return self.func( 1773 "TO_NUMBER", 1774 expression.this, 1775 expression.args.get("format"), 1776 expression.args.get("precision"), 1777 expression.args.get("scale"), 1778 ) 1779 1780 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1781 milli = expression.args.get("milli") 1782 if milli is not None: 1783 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1784 expression.set("nano", milli_to_nano) 1785 1786 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1787 1788 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1789 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1790 return self.func("TO_GEOGRAPHY", expression.this) 1791 if expression.is_type(exp.DataType.Type.GEOMETRY): 1792 return self.func("TO_GEOMETRY", expression.this) 1793 1794 return super().cast_sql(expression, safe_prefix=safe_prefix) 1795 1796 def trycast_sql(self, expression: exp.TryCast) -> str: 1797 value = expression.this 1798 1799 if value.type is None: 1800 from sqlglot.optimizer.annotate_types import annotate_types 1801 1802 value = annotate_types(value, dialect=self.dialect) 1803 1804 # Snowflake requires that TRY_CAST's value be a string 1805 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1806 # if we can deduce that the value is a string, then we can generate TRY_CAST 1807 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1808 return super().trycast_sql(expression) 1809 1810 return self.cast_sql(expression) 1811 1812 def log_sql(self, expression: exp.Log) -> str: 1813 if not expression.expression: 1814 return self.func("LN", expression.this) 1815 1816 return super().log_sql(expression) 1817 1818 def greatest_sql(self, expression: exp.Greatest) -> str: 1819 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1820 return self.func(name, expression.this, *expression.expressions) 1821 1822 def least_sql(self, expression: exp.Least) -> str: 1823 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1824 return self.func(name, expression.this, *expression.expressions) 1825 1826 def unnest_sql(self, expression: exp.Unnest) -> str: 1827 unnest_alias = expression.args.get("alias") 1828 offset = expression.args.get("offset") 1829 1830 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1831 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1832 1833 columns = [ 1834 exp.to_identifier("seq"), 1835 exp.to_identifier("key"), 1836 exp.to_identifier("path"), 1837 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1838 value, 1839 exp.to_identifier("this"), 1840 ] 1841 1842 if unnest_alias: 1843 unnest_alias.set("columns", columns) 1844 else: 1845 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1846 1847 table_input = self.sql(expression.expressions[0]) 1848 if not table_input.startswith("INPUT =>"): 1849 table_input = f"INPUT => {table_input}" 1850 1851 expression_parent = expression.parent 1852 1853 explode = ( 1854 f"FLATTEN({table_input})" 1855 if isinstance(expression_parent, exp.Lateral) 1856 else f"TABLE(FLATTEN({table_input}))" 1857 ) 1858 alias = self.sql(unnest_alias) 1859 alias = f" AS {alias}" if alias else "" 1860 value = ( 1861 "" 1862 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1863 else f"{value} FROM " 1864 ) 1865 1866 return f"{value}{explode}{alias}" 1867 1868 def show_sql(self, expression: exp.Show) -> str: 1869 terse = "TERSE " if expression.args.get("terse") else "" 1870 history = " HISTORY" if expression.args.get("history") else "" 1871 like = self.sql(expression, "like") 1872 like = f" LIKE {like}" if like else "" 1873 1874 scope = self.sql(expression, "scope") 1875 scope = f" {scope}" if scope else "" 1876 1877 scope_kind = self.sql(expression, "scope_kind") 1878 if scope_kind: 1879 scope_kind = f" IN {scope_kind}" 1880 1881 starts_with = self.sql(expression, "starts_with") 1882 if starts_with: 1883 starts_with = f" STARTS WITH {starts_with}" 1884 1885 limit = self.sql(expression, "limit") 1886 1887 from_ = self.sql(expression, "from_") 1888 if from_: 1889 from_ = f" FROM {from_}" 1890 1891 privileges = self.expressions(expression, key="privileges", flat=True) 1892 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1893 1894 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1895 1896 def describe_sql(self, expression: exp.Describe) -> str: 1897 # Default to table if kind is unknown 1898 kind_value = expression.args.get("kind") or "TABLE" 1899 kind = f" {kind_value}" if kind_value else "" 1900 this = f" {self.sql(expression, 'this')}" 1901 expressions = self.expressions(expression, flat=True) 1902 expressions = f" {expressions}" if expressions else "" 1903 return f"DESCRIBE{kind}{this}{expressions}" 1904 1905 def generatedasidentitycolumnconstraint_sql( 1906 self, expression: exp.GeneratedAsIdentityColumnConstraint 1907 ) -> str: 1908 start = expression.args.get("start") 1909 start = f" START {start}" if start else "" 1910 increment = expression.args.get("increment") 1911 increment = f" INCREMENT {increment}" if increment else "" 1912 1913 order = expression.args.get("order") 1914 if order is not None: 1915 order_clause = " ORDER" if order else " NOORDER" 1916 else: 1917 order_clause = "" 1918 1919 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1920 1921 def cluster_sql(self, expression: exp.Cluster) -> str: 1922 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1923 1924 def struct_sql(self, expression: exp.Struct) -> str: 1925 if len(expression.expressions) == 1: 1926 arg = expression.expressions[0] 1927 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1928 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1929 return f"{{{self.sql(expression.expressions[0])}}}" 1930 1931 keys = [] 1932 values = [] 1933 1934 for i, e in enumerate(expression.expressions): 1935 if isinstance(e, exp.PropertyEQ): 1936 keys.append( 1937 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1938 ) 1939 values.append(e.expression) 1940 else: 1941 keys.append(exp.Literal.string(f"_{i}")) 1942 values.append(e) 1943 1944 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1945 1946 @unsupported_args("weight", "accuracy") 1947 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1948 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1949 1950 def alterset_sql(self, expression: exp.AlterSet) -> str: 1951 exprs = self.expressions(expression, flat=True) 1952 exprs = f" {exprs}" if exprs else "" 1953 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1954 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1955 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1956 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1957 tag = self.expressions(expression, key="tag", flat=True) 1958 tag = f" TAG {tag}" if tag else "" 1959 1960 return f"SET{exprs}{file_format}{copy_options}{tag}" 1961 1962 def strtotime_sql(self, expression: exp.StrToTime): 1963 # target_type is stored as a DataType instance 1964 target_type = expression.args.get("target_type") 1965 1966 # Get the type enum from DataType instance or from type annotation 1967 if isinstance(target_type, exp.DataType): 1968 type_enum = target_type.this 1969 elif expression.type: 1970 type_enum = expression.type.this 1971 else: 1972 type_enum = exp.DataType.Type.TIMESTAMP 1973 1974 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 1975 1976 return self.func( 1977 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 1978 expression.this, 1979 self.format_time(expression), 1980 ) 1981 1982 def timestampsub_sql(self, expression: exp.TimestampSub): 1983 return self.sql( 1984 exp.TimestampAdd( 1985 this=expression.this, 1986 expression=expression.expression * -1, 1987 unit=expression.unit, 1988 ) 1989 ) 1990 1991 def jsonextract_sql(self, expression: exp.JSONExtract): 1992 this = expression.this 1993 1994 # JSON strings are valid coming from other dialects such as BQ so 1995 # for these cases we PARSE_JSON preemptively 1996 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1997 "requires_json" 1998 ): 1999 this = exp.ParseJSON(this=this) 2000 2001 return self.func( 2002 "GET_PATH", 2003 this, 2004 expression.expression, 2005 ) 2006 2007 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2008 this = expression.this 2009 if this.is_string: 2010 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2011 2012 return self.func("TO_CHAR", this, self.format_time(expression)) 2013 2014 def datesub_sql(self, expression: exp.DateSub) -> str: 2015 value = expression.expression 2016 if value: 2017 value.replace(value * (-1)) 2018 else: 2019 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2020 2021 return date_delta_sql("DATEADD")(self, expression) 2022 2023 def select_sql(self, expression: exp.Select) -> str: 2024 limit = expression.args.get("limit") 2025 offset = expression.args.get("offset") 2026 if offset and not limit: 2027 expression.limit(exp.Null(), copy=False) 2028 return super().select_sql(expression) 2029 2030 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2031 is_materialized = expression.find(exp.MaterializedProperty) 2032 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2033 2034 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2035 # For materialized views, COPY GRANTS is located *before* the columns list 2036 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2037 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2038 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2039 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2040 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2041 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2042 2043 this_name = self.sql(expression.this, "this") 2044 copy_grants = self.sql(copy_grants_property) 2045 this_schema = self.schema_columns_sql(expression.this) 2046 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2047 2048 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2049 2050 return super().createable_sql(expression, locations) 2051 2052 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2053 this = expression.this 2054 2055 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2056 # and add it later as part of the WITHIN GROUP clause 2057 order = this if isinstance(this, exp.Order) else None 2058 if order: 2059 expression.set("this", order.this.pop()) 2060 2061 expr_sql = super().arrayagg_sql(expression) 2062 2063 if order: 2064 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2065 2066 return expr_sql 2067 2068 def array_sql(self, expression: exp.Array) -> str: 2069 expressions = expression.expressions 2070 2071 first_expr = seq_get(expressions, 0) 2072 if isinstance(first_expr, exp.Select): 2073 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2074 if first_expr.text("kind").upper() == "STRUCT": 2075 object_construct_args = [] 2076 for expr in first_expr.expressions: 2077 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2078 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2079 name = expr.this if isinstance(expr, exp.Alias) else expr 2080 2081 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2082 2083 array_agg = exp.ArrayAgg( 2084 this=_build_object_construct(args=object_construct_args) 2085 ) 2086 2087 first_expr.set("kind", None) 2088 first_expr.set("expressions", [array_agg]) 2089 2090 return self.sql(first_expr.subquery()) 2091 2092 return inline_array_sql(self, expression) 2093 2094 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2095 zone = self.sql(expression, "this") 2096 if not zone: 2097 return super().currentdate_sql(expression) 2098 2099 expr = exp.Cast( 2100 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2101 to=exp.DataType(this=exp.DataType.Type.DATE), 2102 ) 2103 return self.sql(expr) 2104 2105 def dot_sql(self, expression: exp.Dot) -> str: 2106 this = expression.this 2107 2108 if not this.type: 2109 from sqlglot.optimizer.annotate_types import annotate_types 2110 2111 this = annotate_types(this, dialect=self.dialect) 2112 2113 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2114 # Generate colon notation for the top level STRUCT 2115 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2116 2117 return super().dot_sql(expression) 2118 2119 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2120 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2121 2122 def format_sql(self, expression: exp.Format) -> str: 2123 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2124 return self.func("TO_CHAR", expression.expressions[0]) 2125 2126 return self.function_fallback_sql(expression) 2127 2128 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2129 # Set part_index to 1 if missing 2130 if not expression.args.get("delimiter"): 2131 expression.set("delimiter", exp.Literal.string(" ")) 2132 2133 if not expression.args.get("part_index"): 2134 expression.set("part_index", exp.Literal.number(1)) 2135 2136 return rename_func("SPLIT_PART")(self, expression) 2137 2138 def uniform_sql(self, expression: exp.Uniform) -> str: 2139 gen = expression.args.get("gen") 2140 seed = expression.args.get("seed") 2141 2142 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2143 if seed: 2144 gen = exp.Rand(this=seed) 2145 2146 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2147 if not gen: 2148 gen = exp.Rand() 2149 2150 return self.func("UNIFORM", expression.this, expression.expression, gen)
652class Snowflake(Dialect): 653 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 654 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 655 NULL_ORDERING = "nulls_are_large" 656 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 657 SUPPORTS_USER_DEFINED_TYPES = False 658 SUPPORTS_SEMI_ANTI_JOIN = False 659 PREFER_CTE_ALIAS_COLUMN = True 660 TABLESAMPLE_SIZE_IS_PERCENT = True 661 COPY_PARAMS_ARE_CSV = False 662 ARRAY_AGG_INCLUDES_NULLS = None 663 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 664 TRY_CAST_REQUIRES_STRING = True 665 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 666 LEAST_GREATEST_IGNORES_NULLS = False 667 668 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 669 670 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 671 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 672 673 TIME_MAPPING = { 674 "YYYY": "%Y", 675 "yyyy": "%Y", 676 "YY": "%y", 677 "yy": "%y", 678 "MMMM": "%B", 679 "mmmm": "%B", 680 "MON": "%b", 681 "mon": "%b", 682 "MM": "%m", 683 "mm": "%m", 684 "DD": "%d", 685 "dd": "%-d", 686 "DY": "%a", 687 "dy": "%w", 688 "HH24": "%H", 689 "hh24": "%H", 690 "HH12": "%I", 691 "hh12": "%I", 692 "MI": "%M", 693 "mi": "%M", 694 "SS": "%S", 695 "ss": "%S", 696 "FF6": "%f", 697 "ff6": "%f", 698 # Seems like Snowflake treats AM/PM in the format string as equivalent, 699 # only the time (stamp) value's AM/PM affects the output 700 "AM": "%p", 701 "am": "%p", 702 "PM": "%p", 703 "pm": "%p", 704 } 705 706 DATE_PART_MAPPING = { 707 **Dialect.DATE_PART_MAPPING, 708 "ISOWEEK": "WEEKISO", 709 } 710 711 PSEUDOCOLUMNS = {"LEVEL"} 712 713 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 714 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 715 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 716 return super().can_quote(identifier, identify) and not ( 717 isinstance(identifier.parent, exp.Table) 718 and not identifier.quoted 719 and identifier.name.lower() == "dual" 720 ) 721 722 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 723 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 724 SINGLE_TOKENS.pop("$") 725 726 class Parser(parser.Parser): 727 IDENTIFY_PIVOT_STRINGS = True 728 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 729 COLON_IS_VARIANT_EXTRACT = True 730 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 731 732 ID_VAR_TOKENS = { 733 *parser.Parser.ID_VAR_TOKENS, 734 TokenType.EXCEPT, 735 TokenType.MATCH_CONDITION, 736 } 737 738 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 739 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 740 741 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 742 743 FUNCTIONS = { 744 **parser.Parser.FUNCTIONS, 745 "ADD_MONTHS": lambda args: exp.AddMonths( 746 this=seq_get(args, 0), 747 expression=seq_get(args, 1), 748 preserve_end_of_month=True, 749 ), 750 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 751 "APPROX_TOP_K": _build_approx_top_k, 752 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 753 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 754 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 755 ), 756 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 757 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 758 start=seq_get(args, 0), 759 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 760 step=seq_get(args, 2), 761 ), 762 "ARRAY_SORT": exp.SortArray.from_arg_list, 763 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 764 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 765 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 766 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 767 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 768 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 769 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 770 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 771 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 772 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 773 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 774 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 775 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 776 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 777 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 778 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 779 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 780 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 781 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 782 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 783 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 784 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 785 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 786 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 787 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 788 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 789 "CORR": lambda args: exp.Corr( 790 this=seq_get(args, 0), 791 expression=seq_get(args, 1), 792 null_on_zero_variance=True, 793 ), 794 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 795 "DATEFROMPARTS": _build_date_from_parts, 796 "DATE_FROM_PARTS": _build_date_from_parts, 797 "DATE_TRUNC": _date_trunc_to_time, 798 "DATEADD": _build_date_time_add(exp.DateAdd), 799 "DATEDIFF": _build_datediff, 800 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 801 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 802 "DIV0": _build_if_from_div0, 803 "DIV0NULL": _build_if_from_div0null, 804 "EDITDISTANCE": lambda args: exp.Levenshtein( 805 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 806 ), 807 "FLATTEN": exp.Explode.from_arg_list, 808 "GET": exp.GetExtract.from_arg_list, 809 "GETDATE": exp.CurrentTimestamp.from_arg_list, 810 "GET_PATH": lambda args, dialect: exp.JSONExtract( 811 this=seq_get(args, 0), 812 expression=dialect.to_json_path(seq_get(args, 1)), 813 requires_json=True, 814 ), 815 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 816 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 817 ), 818 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 819 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 820 ), 821 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 822 "IFF": exp.If.from_arg_list, 823 "MD5_HEX": exp.MD5.from_arg_list, 824 "MD5_BINARY": exp.MD5Digest.from_arg_list, 825 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 826 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 827 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 828 "LAST_DAY": lambda args: exp.LastDay( 829 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 830 ), 831 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 832 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 833 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 834 "NULLIFZERO": _build_if_from_nullifzero, 835 "OBJECT_CONSTRUCT": _build_object_construct, 836 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 837 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 838 "PARSE_URL": lambda args: exp.ParseUrl( 839 this=seq_get(args, 0), permissive=seq_get(args, 1) 840 ), 841 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 842 "REGEXP_REPLACE": _build_regexp_replace, 843 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 844 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 845 "REPLACE": build_replace_with_optional_replacement, 846 "RLIKE": exp.RegexpLike.from_arg_list, 847 "ROUND": _build_round, 848 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 849 "SHA1_HEX": exp.SHA.from_arg_list, 850 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 851 "SHA2_HEX": exp.SHA2.from_arg_list, 852 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 853 "STDDEV_SAMP": exp.Stddev.from_arg_list, 854 "STRTOK": _build_strtok, 855 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 856 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 857 "TIMEADD": _build_date_time_add(exp.TimeAdd), 858 "TIMEDIFF": _build_datediff, 859 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 860 "TIMESTAMPDIFF": _build_datediff, 861 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 862 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 863 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 864 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 865 "TRY_DECRYPT": lambda args: exp.Decrypt( 866 this=seq_get(args, 0), 867 passphrase=seq_get(args, 1), 868 aad=seq_get(args, 2), 869 encryption_method=seq_get(args, 3), 870 safe=True, 871 ), 872 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 873 this=seq_get(args, 0), 874 key=seq_get(args, 1), 875 iv=seq_get(args, 2), 876 aad=seq_get(args, 3), 877 encryption_method=seq_get(args, 4), 878 aead=seq_get(args, 5), 879 safe=True, 880 ), 881 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 882 "TRY_TO_BINARY": lambda args: exp.ToBinary( 883 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 884 ), 885 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 886 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 887 **dict.fromkeys( 888 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 889 ), 890 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 891 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 892 ), 893 "TRY_TO_FILE": lambda args: exp.ToFile( 894 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 895 ), 896 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 897 "TRY_TO_TIMESTAMP": _build_datetime( 898 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 899 ), 900 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 901 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 902 ), 903 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 904 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 905 ), 906 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 907 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 908 ), 909 "TO_CHAR": build_timetostr_or_tochar, 910 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 911 **dict.fromkeys( 912 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 913 lambda args: exp.ToNumber( 914 this=seq_get(args, 0), 915 format=seq_get(args, 1), 916 precision=seq_get(args, 2), 917 scale=seq_get(args, 3), 918 ), 919 ), 920 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 921 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 922 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 923 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 924 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 925 "TO_VARCHAR": build_timetostr_or_tochar, 926 "TO_JSON": exp.JSONFormat.from_arg_list, 927 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 928 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 929 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 930 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 931 "ZEROIFNULL": _build_if_from_zeroifnull, 932 "LIKE": build_like(exp.Like), 933 "ILIKE": build_like(exp.ILike), 934 "SEARCH": _build_search, 935 "SKEW": exp.Skewness.from_arg_list, 936 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 937 "WEEKISO": exp.WeekOfYear.from_arg_list, 938 "WEEKOFYEAR": exp.Week.from_arg_list, 939 } 940 FUNCTIONS.pop("PREDICT") 941 942 FUNCTION_PARSERS = { 943 **parser.Parser.FUNCTION_PARSERS, 944 "DATE_PART": lambda self: self._parse_date_part(), 945 "DIRECTORY": lambda self: self._parse_directory(), 946 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 947 "LISTAGG": lambda self: self._parse_string_agg(), 948 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 949 } 950 FUNCTION_PARSERS.pop("TRIM") 951 952 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 953 954 ALTER_PARSERS = { 955 **parser.Parser.ALTER_PARSERS, 956 "SESSION": lambda self: self._parse_alter_session(), 957 "UNSET": lambda self: self.expression( 958 exp.Set, 959 tag=self._match_text_seq("TAG"), 960 expressions=self._parse_csv(self._parse_id_var), 961 unset=True, 962 ), 963 } 964 965 STATEMENT_PARSERS = { 966 **parser.Parser.STATEMENT_PARSERS, 967 TokenType.GET: lambda self: self._parse_get(), 968 TokenType.PUT: lambda self: self._parse_put(), 969 TokenType.SHOW: lambda self: self._parse_show(), 970 } 971 972 PROPERTY_PARSERS = { 973 **parser.Parser.PROPERTY_PARSERS, 974 "CREDENTIALS": lambda self: self._parse_credentials_property(), 975 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 976 "LOCATION": lambda self: self._parse_location_property(), 977 "TAG": lambda self: self._parse_tag(), 978 "USING": lambda self: self._match_text_seq("TEMPLATE") 979 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 980 } 981 982 TYPE_CONVERTERS = { 983 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 984 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 985 } 986 987 SHOW_PARSERS = { 988 "DATABASES": _show_parser("DATABASES"), 989 "TERSE DATABASES": _show_parser("DATABASES"), 990 "SCHEMAS": _show_parser("SCHEMAS"), 991 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 992 "OBJECTS": _show_parser("OBJECTS"), 993 "TERSE OBJECTS": _show_parser("OBJECTS"), 994 "TABLES": _show_parser("TABLES"), 995 "TERSE TABLES": _show_parser("TABLES"), 996 "VIEWS": _show_parser("VIEWS"), 997 "TERSE VIEWS": _show_parser("VIEWS"), 998 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 999 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1000 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1001 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1002 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1003 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1004 "SEQUENCES": _show_parser("SEQUENCES"), 1005 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1006 "STAGES": _show_parser("STAGES"), 1007 "COLUMNS": _show_parser("COLUMNS"), 1008 "USERS": _show_parser("USERS"), 1009 "TERSE USERS": _show_parser("USERS"), 1010 "FILE FORMATS": _show_parser("FILE FORMATS"), 1011 "FUNCTIONS": _show_parser("FUNCTIONS"), 1012 "PROCEDURES": _show_parser("PROCEDURES"), 1013 "WAREHOUSES": _show_parser("WAREHOUSES"), 1014 } 1015 1016 CONSTRAINT_PARSERS = { 1017 **parser.Parser.CONSTRAINT_PARSERS, 1018 "WITH": lambda self: self._parse_with_constraint(), 1019 "MASKING": lambda self: self._parse_with_constraint(), 1020 "PROJECTION": lambda self: self._parse_with_constraint(), 1021 "TAG": lambda self: self._parse_with_constraint(), 1022 } 1023 1024 STAGED_FILE_SINGLE_TOKENS = { 1025 TokenType.DOT, 1026 TokenType.MOD, 1027 TokenType.SLASH, 1028 } 1029 1030 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1031 1032 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1033 1034 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1035 1036 LAMBDAS = { 1037 **parser.Parser.LAMBDAS, 1038 TokenType.ARROW: lambda self, expressions: self.expression( 1039 exp.Lambda, 1040 this=self._replace_lambda( 1041 self._parse_assignment(), 1042 expressions, 1043 ), 1044 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1045 ), 1046 } 1047 1048 COLUMN_OPERATORS = { 1049 **parser.Parser.COLUMN_OPERATORS, 1050 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1051 exp.ModelAttribute, this=this, expression=attr 1052 ), 1053 } 1054 1055 def _parse_directory(self) -> exp.DirectoryStage: 1056 table = self._parse_table_parts() 1057 1058 if isinstance(table, exp.Table): 1059 table = table.this 1060 1061 return self.expression(exp.DirectoryStage, this=table) 1062 1063 def _parse_use(self) -> exp.Use: 1064 if self._match_text_seq("SECONDARY", "ROLES"): 1065 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1066 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1067 return self.expression( 1068 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1069 ) 1070 1071 return super()._parse_use() 1072 1073 def _negate_range( 1074 self, this: t.Optional[exp.Expression] = None 1075 ) -> t.Optional[exp.Expression]: 1076 if not this: 1077 return this 1078 1079 query = this.args.get("query") 1080 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1081 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1082 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1083 # which can produce different results (most likely a SnowFlake bug). 1084 # 1085 # https://docs.snowflake.com/en/sql-reference/functions/in 1086 # Context: https://github.com/tobymao/sqlglot/issues/3890 1087 return self.expression( 1088 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1089 ) 1090 1091 return self.expression(exp.Not, this=this) 1092 1093 def _parse_tag(self) -> exp.Tags: 1094 return self.expression( 1095 exp.Tags, 1096 expressions=self._parse_wrapped_csv(self._parse_property), 1097 ) 1098 1099 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1100 if self._prev.token_type != TokenType.WITH: 1101 self._retreat(self._index - 1) 1102 1103 if self._match_text_seq("MASKING", "POLICY"): 1104 policy = self._parse_column() 1105 return self.expression( 1106 exp.MaskingPolicyColumnConstraint, 1107 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1108 expressions=self._match(TokenType.USING) 1109 and self._parse_wrapped_csv(self._parse_id_var), 1110 ) 1111 if self._match_text_seq("PROJECTION", "POLICY"): 1112 policy = self._parse_column() 1113 return self.expression( 1114 exp.ProjectionPolicyColumnConstraint, 1115 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1116 ) 1117 if self._match(TokenType.TAG): 1118 return self._parse_tag() 1119 1120 return None 1121 1122 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1123 if self._match(TokenType.TAG): 1124 return self._parse_tag() 1125 1126 return super()._parse_with_property() 1127 1128 def _parse_create(self) -> exp.Create | exp.Command: 1129 expression = super()._parse_create() 1130 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1131 # Replace the Table node with the enclosed Identifier 1132 expression.this.replace(expression.this.this) 1133 1134 return expression 1135 1136 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1137 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1138 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1139 this = self._parse_var() or self._parse_type() 1140 1141 if not this: 1142 return None 1143 1144 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1145 expression = ( 1146 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1147 ) 1148 return self.expression(exp.Extract, this=map_date_part(this), expression=expression) 1149 1150 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1151 if is_map: 1152 # Keys are strings in Snowflake's objects, see also: 1153 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1154 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1155 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1156 1157 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1158 1159 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1160 lateral = super()._parse_lateral() 1161 if not lateral: 1162 return lateral 1163 1164 if isinstance(lateral.this, exp.Explode): 1165 table_alias = lateral.args.get("alias") 1166 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1167 if table_alias and not table_alias.args.get("columns"): 1168 table_alias.set("columns", columns) 1169 elif not table_alias: 1170 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1171 1172 return lateral 1173 1174 def _parse_table_parts( 1175 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1176 ) -> exp.Table: 1177 # https://docs.snowflake.com/en/user-guide/querying-stage 1178 if self._match(TokenType.STRING, advance=False): 1179 table = self._parse_string() 1180 elif self._match_text_seq("@", advance=False): 1181 table = self._parse_location_path() 1182 else: 1183 table = None 1184 1185 if table: 1186 file_format = None 1187 pattern = None 1188 1189 wrapped = self._match(TokenType.L_PAREN) 1190 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1191 if self._match_text_seq("FILE_FORMAT", "=>"): 1192 file_format = self._parse_string() or super()._parse_table_parts( 1193 is_db_reference=is_db_reference 1194 ) 1195 elif self._match_text_seq("PATTERN", "=>"): 1196 pattern = self._parse_string() 1197 else: 1198 break 1199 1200 self._match(TokenType.COMMA) 1201 1202 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1203 else: 1204 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1205 1206 return table 1207 1208 def _parse_table( 1209 self, 1210 schema: bool = False, 1211 joins: bool = False, 1212 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1213 parse_bracket: bool = False, 1214 is_db_reference: bool = False, 1215 parse_partition: bool = False, 1216 consume_pipe: bool = False, 1217 ) -> t.Optional[exp.Expression]: 1218 table = super()._parse_table( 1219 schema=schema, 1220 joins=joins, 1221 alias_tokens=alias_tokens, 1222 parse_bracket=parse_bracket, 1223 is_db_reference=is_db_reference, 1224 parse_partition=parse_partition, 1225 ) 1226 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1227 table_from_rows = table.this 1228 for arg in exp.TableFromRows.arg_types: 1229 if arg != "this": 1230 table_from_rows.set(arg, table.args.get(arg)) 1231 1232 table = table_from_rows 1233 1234 return table 1235 1236 def _parse_id_var( 1237 self, 1238 any_token: bool = True, 1239 tokens: t.Optional[t.Collection[TokenType]] = None, 1240 ) -> t.Optional[exp.Expression]: 1241 if self._match_text_seq("IDENTIFIER", "("): 1242 identifier = ( 1243 super()._parse_id_var(any_token=any_token, tokens=tokens) 1244 or self._parse_string() 1245 ) 1246 self._match_r_paren() 1247 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1248 1249 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1250 1251 def _parse_show_snowflake(self, this: str) -> exp.Show: 1252 scope = None 1253 scope_kind = None 1254 1255 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1256 # which is syntactically valid but has no effect on the output 1257 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1258 1259 history = self._match_text_seq("HISTORY") 1260 1261 like = self._parse_string() if self._match(TokenType.LIKE) else None 1262 1263 if self._match(TokenType.IN): 1264 if self._match_text_seq("ACCOUNT"): 1265 scope_kind = "ACCOUNT" 1266 elif self._match_text_seq("CLASS"): 1267 scope_kind = "CLASS" 1268 scope = self._parse_table_parts() 1269 elif self._match_text_seq("APPLICATION"): 1270 scope_kind = "APPLICATION" 1271 if self._match_text_seq("PACKAGE"): 1272 scope_kind += " PACKAGE" 1273 scope = self._parse_table_parts() 1274 elif self._match_set(self.DB_CREATABLES): 1275 scope_kind = self._prev.text.upper() 1276 if self._curr: 1277 scope = self._parse_table_parts() 1278 elif self._curr: 1279 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1280 scope = self._parse_table_parts() 1281 1282 return self.expression( 1283 exp.Show, 1284 terse=terse, 1285 this=this, 1286 history=history, 1287 like=like, 1288 scope=scope, 1289 scope_kind=scope_kind, 1290 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1291 limit=self._parse_limit(), 1292 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1293 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1294 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1295 ) 1296 1297 def _parse_put(self) -> exp.Put | exp.Command: 1298 if self._curr.token_type != TokenType.STRING: 1299 return self._parse_as_command(self._prev) 1300 1301 return self.expression( 1302 exp.Put, 1303 this=self._parse_string(), 1304 target=self._parse_location_path(), 1305 properties=self._parse_properties(), 1306 ) 1307 1308 def _parse_get(self) -> t.Optional[exp.Expression]: 1309 start = self._prev 1310 1311 # If we detect GET( then we need to parse a function, not a statement 1312 if self._match(TokenType.L_PAREN): 1313 self._retreat(self._index - 2) 1314 return self._parse_expression() 1315 1316 target = self._parse_location_path() 1317 1318 # Parse as command if unquoted file path 1319 if self._curr.token_type == TokenType.URI_START: 1320 return self._parse_as_command(start) 1321 1322 return self.expression( 1323 exp.Get, 1324 this=self._parse_string(), 1325 target=target, 1326 properties=self._parse_properties(), 1327 ) 1328 1329 def _parse_location_property(self) -> exp.LocationProperty: 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1332 1333 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1334 # Parse either a subquery or a staged file 1335 return ( 1336 self._parse_select(table=True, parse_subquery_alias=False) 1337 if self._match(TokenType.L_PAREN, advance=False) 1338 else self._parse_table_parts() 1339 ) 1340 1341 def _parse_location_path(self) -> exp.Var: 1342 start = self._curr 1343 self._advance_any(ignore_reserved=True) 1344 1345 # We avoid consuming a comma token because external tables like @foo and @bar 1346 # can be joined in a query with a comma separator, as well as closing paren 1347 # in case of subqueries 1348 while self._is_connected() and not self._match_set( 1349 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1350 ): 1351 self._advance_any(ignore_reserved=True) 1352 1353 return exp.var(self._find_sql(start, self._prev)) 1354 1355 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1356 this = super()._parse_lambda_arg() 1357 1358 if not this: 1359 return this 1360 1361 typ = self._parse_types() 1362 1363 if typ: 1364 return self.expression(exp.Cast, this=this, to=typ) 1365 1366 return this 1367 1368 def _parse_foreign_key(self) -> exp.ForeignKey: 1369 # inlineFK, the REFERENCES columns are implied 1370 if self._match(TokenType.REFERENCES, advance=False): 1371 return self.expression(exp.ForeignKey) 1372 1373 # outoflineFK, explicitly names the columns 1374 return super()._parse_foreign_key() 1375 1376 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1377 self._match(TokenType.EQ) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 expressions = self._parse_wrapped_options() 1380 else: 1381 expressions = [self._parse_format_name()] 1382 1383 return self.expression( 1384 exp.FileFormatProperty, 1385 expressions=expressions, 1386 ) 1387 1388 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1389 return self.expression( 1390 exp.CredentialsProperty, 1391 expressions=self._parse_wrapped_options(), 1392 ) 1393 1394 def _parse_semantic_view(self) -> exp.SemanticView: 1395 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1396 1397 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1398 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1399 keyword = self._prev.text.lower() 1400 kwargs[keyword] = self._parse_csv(self._parse_disjunction) 1401 elif self._match_text_seq("WHERE"): 1402 kwargs["where"] = self._parse_expression() 1403 else: 1404 self.raise_error("Expecting ) or encountered unexpected keyword") 1405 break 1406 1407 return self.expression(exp.SemanticView, **kwargs) 1408 1409 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1410 set = super()._parse_set(unset=unset, tag=tag) 1411 1412 if isinstance(set, exp.Set): 1413 for expr in set.expressions: 1414 if isinstance(expr, exp.SetItem): 1415 expr.set("kind", "VARIABLE") 1416 return set 1417 1418 class Tokenizer(tokens.Tokenizer): 1419 STRING_ESCAPES = ["\\", "'"] 1420 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1421 RAW_STRINGS = ["$$"] 1422 COMMENTS = ["--", "//", ("/*", "*/")] 1423 NESTED_COMMENTS = False 1424 1425 KEYWORDS = { 1426 **tokens.Tokenizer.KEYWORDS, 1427 "BYTEINT": TokenType.INT, 1428 "FILE://": TokenType.URI_START, 1429 "FILE FORMAT": TokenType.FILE_FORMAT, 1430 "GET": TokenType.GET, 1431 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1432 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1433 "MINUS": TokenType.EXCEPT, 1434 "NCHAR VARYING": TokenType.VARCHAR, 1435 "PUT": TokenType.PUT, 1436 "REMOVE": TokenType.COMMAND, 1437 "RM": TokenType.COMMAND, 1438 "SAMPLE": TokenType.TABLE_SAMPLE, 1439 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1440 "SQL_DOUBLE": TokenType.DOUBLE, 1441 "SQL_VARCHAR": TokenType.VARCHAR, 1442 "STAGE": TokenType.STAGE, 1443 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1444 "STREAMLIT": TokenType.STREAMLIT, 1445 "TAG": TokenType.TAG, 1446 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1447 "TOP": TokenType.TOP, 1448 "WAREHOUSE": TokenType.WAREHOUSE, 1449 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1450 # FLOAT is a synonym for DOUBLE in Snowflake 1451 "FLOAT": TokenType.DOUBLE, 1452 } 1453 KEYWORDS.pop("/*+") 1454 1455 SINGLE_TOKENS = { 1456 **tokens.Tokenizer.SINGLE_TOKENS, 1457 "$": TokenType.PARAMETER, 1458 "!": TokenType.EXCLAMATION, 1459 } 1460 1461 VAR_SINGLE_TOKENS = {"$"} 1462 1463 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1464 1465 class Generator(generator.Generator): 1466 PARAMETER_TOKEN = "$" 1467 MATCHED_BY_SOURCE = False 1468 SINGLE_STRING_INTERVAL = True 1469 JOIN_HINTS = False 1470 TABLE_HINTS = False 1471 QUERY_HINTS = False 1472 AGGREGATE_FILTER_SUPPORTED = False 1473 SUPPORTS_TABLE_COPY = False 1474 COLLATE_IS_FUNC = True 1475 LIMIT_ONLY_LITERALS = True 1476 JSON_KEY_VALUE_PAIR_SEP = "," 1477 INSERT_OVERWRITE = " OVERWRITE INTO" 1478 STRUCT_DELIMITER = ("(", ")") 1479 COPY_PARAMS_ARE_WRAPPED = False 1480 COPY_PARAMS_EQ_REQUIRED = True 1481 STAR_EXCEPT = "EXCLUDE" 1482 SUPPORTS_EXPLODING_PROJECTIONS = False 1483 ARRAY_CONCAT_IS_VAR_LEN = False 1484 SUPPORTS_CONVERT_TIMEZONE = True 1485 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1486 SUPPORTS_MEDIAN = True 1487 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1488 SUPPORTS_DECODE_CASE = True 1489 IS_BOOL_ALLOWED = False 1490 1491 TRANSFORMS = { 1492 **generator.Generator.TRANSFORMS, 1493 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1494 exp.ArgMax: rename_func("MAX_BY"), 1495 exp.ArgMin: rename_func("MIN_BY"), 1496 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1497 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1498 exp.ArrayContains: lambda self, e: self.func( 1499 "ARRAY_CONTAINS", 1500 e.expression 1501 if e.args.get("ensure_variant") is False 1502 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1503 e.this, 1504 ), 1505 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1506 exp.AtTimeZone: lambda self, e: self.func( 1507 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1508 ), 1509 exp.BitwiseOr: rename_func("BITOR"), 1510 exp.BitwiseXor: rename_func("BITXOR"), 1511 exp.BitwiseAnd: rename_func("BITAND"), 1512 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1513 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1514 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1515 exp.BitwiseNot: rename_func("BITNOT"), 1516 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1517 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1518 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1519 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1520 if e.args.get("sysdate") 1521 else self.function_fallback_sql(e), 1522 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1523 if e.this 1524 else "CURRENT_TIMESTAMP", 1525 exp.DateAdd: date_delta_sql("DATEADD"), 1526 exp.DateDiff: date_delta_sql("DATEDIFF"), 1527 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1528 exp.DatetimeDiff: timestampdiff_sql, 1529 exp.DateStrToDate: datestrtodate_sql, 1530 exp.Decrypt: lambda self, e: self.func( 1531 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1532 e.this, 1533 e.args.get("passphrase"), 1534 e.args.get("aad"), 1535 e.args.get("encryption_method"), 1536 ), 1537 exp.DecryptRaw: lambda self, e: self.func( 1538 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1539 e.this, 1540 e.args.get("key"), 1541 e.args.get("iv"), 1542 e.args.get("aad"), 1543 e.args.get("encryption_method"), 1544 e.args.get("aead"), 1545 ), 1546 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1547 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1548 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1549 exp.DayOfYear: rename_func("DAYOFYEAR"), 1550 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1551 exp.Explode: rename_func("FLATTEN"), 1552 exp.Extract: lambda self, e: self.func( 1553 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1554 ), 1555 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1556 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1557 exp.FileFormatProperty: lambda self, 1558 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1559 exp.FromTimeZone: lambda self, e: self.func( 1560 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1561 ), 1562 exp.GenerateSeries: lambda self, e: self.func( 1563 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1564 ), 1565 exp.GetExtract: rename_func("GET"), 1566 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1567 exp.If: if_sql(name="IFF", false_value="NULL"), 1568 exp.JSONExtractArray: _json_extract_value_array_sql, 1569 exp.JSONExtractScalar: lambda self, e: self.func( 1570 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1571 ), 1572 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1573 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1574 exp.JSONPathRoot: lambda *_: "", 1575 exp.JSONValueArray: _json_extract_value_array_sql, 1576 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1577 rename_func("EDITDISTANCE") 1578 ), 1579 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1580 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1581 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1582 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1583 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1584 exp.MakeInterval: no_make_interval_sql, 1585 exp.Max: max_or_greatest, 1586 exp.Min: min_or_least, 1587 exp.ParseJSON: lambda self, e: self.func( 1588 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1589 ), 1590 exp.ToBinary: lambda self, e: self.func( 1591 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1592 ), 1593 exp.ToBoolean: lambda self, e: self.func( 1594 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1595 ), 1596 exp.ToDouble: lambda self, e: self.func( 1597 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1598 ), 1599 exp.ToFile: lambda self, e: self.func( 1600 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1601 ), 1602 exp.ToNumber: lambda self, e: self.func( 1603 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1604 e.this, 1605 e.args.get("format"), 1606 e.args.get("precision"), 1607 e.args.get("scale"), 1608 ), 1609 exp.JSONFormat: rename_func("TO_JSON"), 1610 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1611 exp.PercentileCont: transforms.preprocess( 1612 [transforms.add_within_group_for_percentiles] 1613 ), 1614 exp.PercentileDisc: transforms.preprocess( 1615 [transforms.add_within_group_for_percentiles] 1616 ), 1617 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1618 exp.RegexpExtract: _regexpextract_sql, 1619 exp.RegexpExtractAll: _regexpextract_sql, 1620 exp.RegexpILike: _regexpilike_sql, 1621 exp.Rand: rename_func("RANDOM"), 1622 exp.Select: transforms.preprocess( 1623 [ 1624 transforms.eliminate_window_clause, 1625 transforms.eliminate_distinct_on, 1626 transforms.explode_projection_to_unnest(), 1627 transforms.eliminate_semi_and_anti_joins, 1628 _transform_generate_date_array, 1629 _qualify_unnested_columns, 1630 _eliminate_dot_variant_lookup, 1631 ] 1632 ), 1633 exp.SHA: rename_func("SHA1"), 1634 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1635 exp.MD5Digest: rename_func("MD5_BINARY"), 1636 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1637 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1638 exp.LowerHex: rename_func("TO_CHAR"), 1639 exp.SortArray: rename_func("ARRAY_SORT"), 1640 exp.Skewness: rename_func("SKEW"), 1641 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1642 exp.StartsWith: rename_func("STARTSWITH"), 1643 exp.EndsWith: rename_func("ENDSWITH"), 1644 exp.StrPosition: lambda self, e: strposition_sql( 1645 self, e, func_name="CHARINDEX", supports_position=True 1646 ), 1647 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1648 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1649 exp.Stuff: rename_func("INSERT"), 1650 exp.StPoint: rename_func("ST_MAKEPOINT"), 1651 exp.TimeAdd: date_delta_sql("TIMEADD"), 1652 exp.TimeSlice: lambda self, e: self.func( 1653 "TIME_SLICE", 1654 e.this, 1655 e.expression, 1656 unit_to_str(e), 1657 e.args.get("kind"), 1658 ), 1659 exp.Timestamp: no_timestamp_sql, 1660 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1661 exp.TimestampDiff: lambda self, e: self.func( 1662 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1663 ), 1664 exp.TimestampTrunc: timestamptrunc_sql(), 1665 exp.TimeStrToTime: timestrtotime_sql, 1666 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1667 exp.ToArray: rename_func("TO_ARRAY"), 1668 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1669 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1670 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1671 exp.TsOrDsToDate: lambda self, e: self.func( 1672 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1673 ), 1674 exp.TsOrDsToTime: lambda self, e: self.func( 1675 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1676 ), 1677 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1678 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1679 exp.Uuid: rename_func("UUID_STRING"), 1680 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1681 exp.Booland: rename_func("BOOLAND"), 1682 exp.Boolor: rename_func("BOOLOR"), 1683 exp.WeekOfYear: rename_func("WEEKISO"), 1684 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1685 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1686 exp.Xor: rename_func("BOOLXOR"), 1687 exp.ByteLength: rename_func("OCTET_LENGTH"), 1688 exp.ArrayConcatAgg: lambda self, e: self.func( 1689 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1690 ), 1691 exp.SHA2Digest: lambda self, e: self.func( 1692 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1693 ), 1694 } 1695 1696 SUPPORTED_JSON_PATH_PARTS = { 1697 exp.JSONPathKey, 1698 exp.JSONPathRoot, 1699 exp.JSONPathSubscript, 1700 } 1701 1702 TYPE_MAPPING = { 1703 **generator.Generator.TYPE_MAPPING, 1704 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1705 exp.DataType.Type.NESTED: "OBJECT", 1706 exp.DataType.Type.STRUCT: "OBJECT", 1707 exp.DataType.Type.TEXT: "VARCHAR", 1708 } 1709 1710 TOKEN_MAPPING = { 1711 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1712 } 1713 1714 PROPERTIES_LOCATION = { 1715 **generator.Generator.PROPERTIES_LOCATION, 1716 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1717 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1718 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1719 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1720 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1721 } 1722 1723 UNSUPPORTED_VALUES_EXPRESSIONS = { 1724 exp.Map, 1725 exp.StarMap, 1726 exp.Struct, 1727 exp.VarMap, 1728 } 1729 1730 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1731 1732 def with_properties(self, properties: exp.Properties) -> str: 1733 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1734 1735 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1736 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1737 values_as_table = False 1738 1739 return super().values_sql(expression, values_as_table=values_as_table) 1740 1741 def datatype_sql(self, expression: exp.DataType) -> str: 1742 # Check if this is a FLOAT type nested inside a VECTOR type 1743 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1744 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1745 if expression.is_type(exp.DataType.Type.DOUBLE): 1746 parent = expression.parent 1747 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1748 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1749 return "FLOAT" 1750 1751 expressions = expression.expressions 1752 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1753 for field_type in expressions: 1754 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1755 if isinstance(field_type, exp.DataType): 1756 return "OBJECT" 1757 if ( 1758 isinstance(field_type, exp.ColumnDef) 1759 and field_type.this 1760 and field_type.this.is_string 1761 ): 1762 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1763 # converting 'foo' into an identifier, we also need to quote it because these 1764 # keys are case-sensitive. For example: 1765 # 1766 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1767 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1768 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1769 1770 return super().datatype_sql(expression) 1771 1772 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1773 return self.func( 1774 "TO_NUMBER", 1775 expression.this, 1776 expression.args.get("format"), 1777 expression.args.get("precision"), 1778 expression.args.get("scale"), 1779 ) 1780 1781 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1782 milli = expression.args.get("milli") 1783 if milli is not None: 1784 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1785 expression.set("nano", milli_to_nano) 1786 1787 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1788 1789 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1790 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1791 return self.func("TO_GEOGRAPHY", expression.this) 1792 if expression.is_type(exp.DataType.Type.GEOMETRY): 1793 return self.func("TO_GEOMETRY", expression.this) 1794 1795 return super().cast_sql(expression, safe_prefix=safe_prefix) 1796 1797 def trycast_sql(self, expression: exp.TryCast) -> str: 1798 value = expression.this 1799 1800 if value.type is None: 1801 from sqlglot.optimizer.annotate_types import annotate_types 1802 1803 value = annotate_types(value, dialect=self.dialect) 1804 1805 # Snowflake requires that TRY_CAST's value be a string 1806 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1807 # if we can deduce that the value is a string, then we can generate TRY_CAST 1808 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1809 return super().trycast_sql(expression) 1810 1811 return self.cast_sql(expression) 1812 1813 def log_sql(self, expression: exp.Log) -> str: 1814 if not expression.expression: 1815 return self.func("LN", expression.this) 1816 1817 return super().log_sql(expression) 1818 1819 def greatest_sql(self, expression: exp.Greatest) -> str: 1820 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1821 return self.func(name, expression.this, *expression.expressions) 1822 1823 def least_sql(self, expression: exp.Least) -> str: 1824 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1825 return self.func(name, expression.this, *expression.expressions) 1826 1827 def unnest_sql(self, expression: exp.Unnest) -> str: 1828 unnest_alias = expression.args.get("alias") 1829 offset = expression.args.get("offset") 1830 1831 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1832 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1833 1834 columns = [ 1835 exp.to_identifier("seq"), 1836 exp.to_identifier("key"), 1837 exp.to_identifier("path"), 1838 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1839 value, 1840 exp.to_identifier("this"), 1841 ] 1842 1843 if unnest_alias: 1844 unnest_alias.set("columns", columns) 1845 else: 1846 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1847 1848 table_input = self.sql(expression.expressions[0]) 1849 if not table_input.startswith("INPUT =>"): 1850 table_input = f"INPUT => {table_input}" 1851 1852 expression_parent = expression.parent 1853 1854 explode = ( 1855 f"FLATTEN({table_input})" 1856 if isinstance(expression_parent, exp.Lateral) 1857 else f"TABLE(FLATTEN({table_input}))" 1858 ) 1859 alias = self.sql(unnest_alias) 1860 alias = f" AS {alias}" if alias else "" 1861 value = ( 1862 "" 1863 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1864 else f"{value} FROM " 1865 ) 1866 1867 return f"{value}{explode}{alias}" 1868 1869 def show_sql(self, expression: exp.Show) -> str: 1870 terse = "TERSE " if expression.args.get("terse") else "" 1871 history = " HISTORY" if expression.args.get("history") else "" 1872 like = self.sql(expression, "like") 1873 like = f" LIKE {like}" if like else "" 1874 1875 scope = self.sql(expression, "scope") 1876 scope = f" {scope}" if scope else "" 1877 1878 scope_kind = self.sql(expression, "scope_kind") 1879 if scope_kind: 1880 scope_kind = f" IN {scope_kind}" 1881 1882 starts_with = self.sql(expression, "starts_with") 1883 if starts_with: 1884 starts_with = f" STARTS WITH {starts_with}" 1885 1886 limit = self.sql(expression, "limit") 1887 1888 from_ = self.sql(expression, "from_") 1889 if from_: 1890 from_ = f" FROM {from_}" 1891 1892 privileges = self.expressions(expression, key="privileges", flat=True) 1893 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1894 1895 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1896 1897 def describe_sql(self, expression: exp.Describe) -> str: 1898 # Default to table if kind is unknown 1899 kind_value = expression.args.get("kind") or "TABLE" 1900 kind = f" {kind_value}" if kind_value else "" 1901 this = f" {self.sql(expression, 'this')}" 1902 expressions = self.expressions(expression, flat=True) 1903 expressions = f" {expressions}" if expressions else "" 1904 return f"DESCRIBE{kind}{this}{expressions}" 1905 1906 def generatedasidentitycolumnconstraint_sql( 1907 self, expression: exp.GeneratedAsIdentityColumnConstraint 1908 ) -> str: 1909 start = expression.args.get("start") 1910 start = f" START {start}" if start else "" 1911 increment = expression.args.get("increment") 1912 increment = f" INCREMENT {increment}" if increment else "" 1913 1914 order = expression.args.get("order") 1915 if order is not None: 1916 order_clause = " ORDER" if order else " NOORDER" 1917 else: 1918 order_clause = "" 1919 1920 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1921 1922 def cluster_sql(self, expression: exp.Cluster) -> str: 1923 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1924 1925 def struct_sql(self, expression: exp.Struct) -> str: 1926 if len(expression.expressions) == 1: 1927 arg = expression.expressions[0] 1928 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1929 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1930 return f"{{{self.sql(expression.expressions[0])}}}" 1931 1932 keys = [] 1933 values = [] 1934 1935 for i, e in enumerate(expression.expressions): 1936 if isinstance(e, exp.PropertyEQ): 1937 keys.append( 1938 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1939 ) 1940 values.append(e.expression) 1941 else: 1942 keys.append(exp.Literal.string(f"_{i}")) 1943 values.append(e) 1944 1945 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1946 1947 @unsupported_args("weight", "accuracy") 1948 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1949 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1950 1951 def alterset_sql(self, expression: exp.AlterSet) -> str: 1952 exprs = self.expressions(expression, flat=True) 1953 exprs = f" {exprs}" if exprs else "" 1954 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1955 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1956 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1957 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1958 tag = self.expressions(expression, key="tag", flat=True) 1959 tag = f" TAG {tag}" if tag else "" 1960 1961 return f"SET{exprs}{file_format}{copy_options}{tag}" 1962 1963 def strtotime_sql(self, expression: exp.StrToTime): 1964 # target_type is stored as a DataType instance 1965 target_type = expression.args.get("target_type") 1966 1967 # Get the type enum from DataType instance or from type annotation 1968 if isinstance(target_type, exp.DataType): 1969 type_enum = target_type.this 1970 elif expression.type: 1971 type_enum = expression.type.this 1972 else: 1973 type_enum = exp.DataType.Type.TIMESTAMP 1974 1975 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 1976 1977 return self.func( 1978 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 1979 expression.this, 1980 self.format_time(expression), 1981 ) 1982 1983 def timestampsub_sql(self, expression: exp.TimestampSub): 1984 return self.sql( 1985 exp.TimestampAdd( 1986 this=expression.this, 1987 expression=expression.expression * -1, 1988 unit=expression.unit, 1989 ) 1990 ) 1991 1992 def jsonextract_sql(self, expression: exp.JSONExtract): 1993 this = expression.this 1994 1995 # JSON strings are valid coming from other dialects such as BQ so 1996 # for these cases we PARSE_JSON preemptively 1997 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1998 "requires_json" 1999 ): 2000 this = exp.ParseJSON(this=this) 2001 2002 return self.func( 2003 "GET_PATH", 2004 this, 2005 expression.expression, 2006 ) 2007 2008 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2009 this = expression.this 2010 if this.is_string: 2011 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2012 2013 return self.func("TO_CHAR", this, self.format_time(expression)) 2014 2015 def datesub_sql(self, expression: exp.DateSub) -> str: 2016 value = expression.expression 2017 if value: 2018 value.replace(value * (-1)) 2019 else: 2020 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2021 2022 return date_delta_sql("DATEADD")(self, expression) 2023 2024 def select_sql(self, expression: exp.Select) -> str: 2025 limit = expression.args.get("limit") 2026 offset = expression.args.get("offset") 2027 if offset and not limit: 2028 expression.limit(exp.Null(), copy=False) 2029 return super().select_sql(expression) 2030 2031 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2032 is_materialized = expression.find(exp.MaterializedProperty) 2033 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2034 2035 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2036 # For materialized views, COPY GRANTS is located *before* the columns list 2037 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2038 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2039 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2040 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2041 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2042 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2043 2044 this_name = self.sql(expression.this, "this") 2045 copy_grants = self.sql(copy_grants_property) 2046 this_schema = self.schema_columns_sql(expression.this) 2047 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2048 2049 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2050 2051 return super().createable_sql(expression, locations) 2052 2053 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2054 this = expression.this 2055 2056 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2057 # and add it later as part of the WITHIN GROUP clause 2058 order = this if isinstance(this, exp.Order) else None 2059 if order: 2060 expression.set("this", order.this.pop()) 2061 2062 expr_sql = super().arrayagg_sql(expression) 2063 2064 if order: 2065 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2066 2067 return expr_sql 2068 2069 def array_sql(self, expression: exp.Array) -> str: 2070 expressions = expression.expressions 2071 2072 first_expr = seq_get(expressions, 0) 2073 if isinstance(first_expr, exp.Select): 2074 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2075 if first_expr.text("kind").upper() == "STRUCT": 2076 object_construct_args = [] 2077 for expr in first_expr.expressions: 2078 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2079 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2080 name = expr.this if isinstance(expr, exp.Alias) else expr 2081 2082 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2083 2084 array_agg = exp.ArrayAgg( 2085 this=_build_object_construct(args=object_construct_args) 2086 ) 2087 2088 first_expr.set("kind", None) 2089 first_expr.set("expressions", [array_agg]) 2090 2091 return self.sql(first_expr.subquery()) 2092 2093 return inline_array_sql(self, expression) 2094 2095 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2096 zone = self.sql(expression, "this") 2097 if not zone: 2098 return super().currentdate_sql(expression) 2099 2100 expr = exp.Cast( 2101 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2102 to=exp.DataType(this=exp.DataType.Type.DATE), 2103 ) 2104 return self.sql(expr) 2105 2106 def dot_sql(self, expression: exp.Dot) -> str: 2107 this = expression.this 2108 2109 if not this.type: 2110 from sqlglot.optimizer.annotate_types import annotate_types 2111 2112 this = annotate_types(this, dialect=self.dialect) 2113 2114 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2115 # Generate colon notation for the top level STRUCT 2116 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2117 2118 return super().dot_sql(expression) 2119 2120 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2121 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2122 2123 def format_sql(self, expression: exp.Format) -> str: 2124 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2125 return self.func("TO_CHAR", expression.expressions[0]) 2126 2127 return self.function_fallback_sql(expression) 2128 2129 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2130 # Set part_index to 1 if missing 2131 if not expression.args.get("delimiter"): 2132 expression.set("delimiter", exp.Literal.string(" ")) 2133 2134 if not expression.args.get("part_index"): 2135 expression.set("part_index", exp.Literal.number(1)) 2136 2137 return rename_func("SPLIT_PART")(self, expression) 2138 2139 def uniform_sql(self, expression: exp.Uniform) -> str: 2140 gen = expression.args.get("gen") 2141 seed = expression.args.get("seed") 2142 2143 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2144 if seed: 2145 gen = exp.Rand(this=seed) 2146 2147 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2148 if not gen: 2149 gen = exp.Rand() 2150 2151 return self.func("UNIFORM", expression.this, expression.expression, gen)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Whether alias references are allowed in JOIN ... ON clauses.
Most dialects do not support this, but Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
For example, in Snowflake: SELECT a.id AS user_id FROM a JOIN b ON user_id = b.id -- VALID
Reference: sqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes">https://docssqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
713 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 714 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 715 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 716 return super().can_quote(identifier, identify) and not ( 717 isinstance(identifier.parent, exp.Table) 718 and not identifier.quoted 719 and identifier.name.lower() == "dual" 720 )
Checks if an identifier can be quoted
Arguments:
- identifier: The identifier to check.
- identify:
True: Always returnsTrueexcept for certain cases."safe": Only returnsTrueif the identifier is case-insensitive."unsafe": Only returnsTrueif the identifier is case-sensitive.
Returns:
Whether the given text can be identified.
Mapping of an escaped sequence (\n) to its unescaped version (
).
722 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 723 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 724 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
726 class Parser(parser.Parser): 727 IDENTIFY_PIVOT_STRINGS = True 728 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 729 COLON_IS_VARIANT_EXTRACT = True 730 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 731 732 ID_VAR_TOKENS = { 733 *parser.Parser.ID_VAR_TOKENS, 734 TokenType.EXCEPT, 735 TokenType.MATCH_CONDITION, 736 } 737 738 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 739 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 740 741 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 742 743 FUNCTIONS = { 744 **parser.Parser.FUNCTIONS, 745 "ADD_MONTHS": lambda args: exp.AddMonths( 746 this=seq_get(args, 0), 747 expression=seq_get(args, 1), 748 preserve_end_of_month=True, 749 ), 750 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 751 "APPROX_TOP_K": _build_approx_top_k, 752 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 753 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 754 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 755 ), 756 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 757 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 758 start=seq_get(args, 0), 759 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 760 step=seq_get(args, 2), 761 ), 762 "ARRAY_SORT": exp.SortArray.from_arg_list, 763 "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 764 "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), 765 "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 766 "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 767 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 768 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 769 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 770 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 771 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 772 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 773 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 774 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 775 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 776 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 777 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 778 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 779 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 780 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 781 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 782 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 783 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 784 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 785 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 786 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 787 "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, 788 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 789 "CORR": lambda args: exp.Corr( 790 this=seq_get(args, 0), 791 expression=seq_get(args, 1), 792 null_on_zero_variance=True, 793 ), 794 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 795 "DATEFROMPARTS": _build_date_from_parts, 796 "DATE_FROM_PARTS": _build_date_from_parts, 797 "DATE_TRUNC": _date_trunc_to_time, 798 "DATEADD": _build_date_time_add(exp.DateAdd), 799 "DATEDIFF": _build_datediff, 800 "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), 801 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 802 "DIV0": _build_if_from_div0, 803 "DIV0NULL": _build_if_from_div0null, 804 "EDITDISTANCE": lambda args: exp.Levenshtein( 805 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 806 ), 807 "FLATTEN": exp.Explode.from_arg_list, 808 "GET": exp.GetExtract.from_arg_list, 809 "GETDATE": exp.CurrentTimestamp.from_arg_list, 810 "GET_PATH": lambda args, dialect: exp.JSONExtract( 811 this=seq_get(args, 0), 812 expression=dialect.to_json_path(seq_get(args, 1)), 813 requires_json=True, 814 ), 815 "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( 816 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 817 ), 818 "LEAST_IGNORE_NULLS": lambda args: exp.Least( 819 this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True 820 ), 821 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 822 "IFF": exp.If.from_arg_list, 823 "MD5_HEX": exp.MD5.from_arg_list, 824 "MD5_BINARY": exp.MD5Digest.from_arg_list, 825 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 826 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 827 "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), 828 "LAST_DAY": lambda args: exp.LastDay( 829 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 830 ), 831 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 832 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 833 "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 834 "NULLIFZERO": _build_if_from_nullifzero, 835 "OBJECT_CONSTRUCT": _build_object_construct, 836 "OBJECT_KEYS": exp.JSONKeys.from_arg_list, 837 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 838 "PARSE_URL": lambda args: exp.ParseUrl( 839 this=seq_get(args, 0), permissive=seq_get(args, 1) 840 ), 841 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 842 "REGEXP_REPLACE": _build_regexp_replace, 843 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 844 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 845 "REPLACE": build_replace_with_optional_replacement, 846 "RLIKE": exp.RegexpLike.from_arg_list, 847 "ROUND": _build_round, 848 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 849 "SHA1_HEX": exp.SHA.from_arg_list, 850 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 851 "SHA2_HEX": exp.SHA2.from_arg_list, 852 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 853 "STDDEV_SAMP": exp.Stddev.from_arg_list, 854 "STRTOK": _build_strtok, 855 "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), 856 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 857 "TIMEADD": _build_date_time_add(exp.TimeAdd), 858 "TIMEDIFF": _build_datediff, 859 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 860 "TIMESTAMPDIFF": _build_datediff, 861 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 862 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 863 "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, 864 "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, 865 "TRY_DECRYPT": lambda args: exp.Decrypt( 866 this=seq_get(args, 0), 867 passphrase=seq_get(args, 1), 868 aad=seq_get(args, 2), 869 encryption_method=seq_get(args, 3), 870 safe=True, 871 ), 872 "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( 873 this=seq_get(args, 0), 874 key=seq_get(args, 1), 875 iv=seq_get(args, 2), 876 aad=seq_get(args, 3), 877 encryption_method=seq_get(args, 4), 878 aead=seq_get(args, 5), 879 safe=True, 880 ), 881 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 882 "TRY_TO_BINARY": lambda args: exp.ToBinary( 883 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 884 ), 885 "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), 886 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 887 **dict.fromkeys( 888 ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number 889 ), 890 "TRY_TO_DOUBLE": lambda args: exp.ToDouble( 891 this=seq_get(args, 0), format=seq_get(args, 1), safe=True 892 ), 893 "TRY_TO_FILE": lambda args: exp.ToFile( 894 this=seq_get(args, 0), path=seq_get(args, 1), safe=True 895 ), 896 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 897 "TRY_TO_TIMESTAMP": _build_datetime( 898 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 899 ), 900 "TRY_TO_TIMESTAMP_LTZ": _build_datetime( 901 "TRY_TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ, safe=True 902 ), 903 "TRY_TO_TIMESTAMP_NTZ": _build_datetime( 904 "TRY_TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ, safe=True 905 ), 906 "TRY_TO_TIMESTAMP_TZ": _build_datetime( 907 "TRY_TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ, safe=True 908 ), 909 "TO_CHAR": build_timetostr_or_tochar, 910 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 911 **dict.fromkeys( 912 ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), 913 lambda args: exp.ToNumber( 914 this=seq_get(args, 0), 915 format=seq_get(args, 1), 916 precision=seq_get(args, 2), 917 scale=seq_get(args, 3), 918 ), 919 ), 920 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 921 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 922 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 923 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMPNTZ), 924 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 925 "TO_VARCHAR": build_timetostr_or_tochar, 926 "TO_JSON": exp.JSONFormat.from_arg_list, 927 "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, 928 "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, 929 "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, 930 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 931 "ZEROIFNULL": _build_if_from_zeroifnull, 932 "LIKE": build_like(exp.Like), 933 "ILIKE": build_like(exp.ILike), 934 "SEARCH": _build_search, 935 "SKEW": exp.Skewness.from_arg_list, 936 "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, 937 "WEEKISO": exp.WeekOfYear.from_arg_list, 938 "WEEKOFYEAR": exp.Week.from_arg_list, 939 } 940 FUNCTIONS.pop("PREDICT") 941 942 FUNCTION_PARSERS = { 943 **parser.Parser.FUNCTION_PARSERS, 944 "DATE_PART": lambda self: self._parse_date_part(), 945 "DIRECTORY": lambda self: self._parse_directory(), 946 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 947 "LISTAGG": lambda self: self._parse_string_agg(), 948 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 949 } 950 FUNCTION_PARSERS.pop("TRIM") 951 952 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 953 954 ALTER_PARSERS = { 955 **parser.Parser.ALTER_PARSERS, 956 "SESSION": lambda self: self._parse_alter_session(), 957 "UNSET": lambda self: self.expression( 958 exp.Set, 959 tag=self._match_text_seq("TAG"), 960 expressions=self._parse_csv(self._parse_id_var), 961 unset=True, 962 ), 963 } 964 965 STATEMENT_PARSERS = { 966 **parser.Parser.STATEMENT_PARSERS, 967 TokenType.GET: lambda self: self._parse_get(), 968 TokenType.PUT: lambda self: self._parse_put(), 969 TokenType.SHOW: lambda self: self._parse_show(), 970 } 971 972 PROPERTY_PARSERS = { 973 **parser.Parser.PROPERTY_PARSERS, 974 "CREDENTIALS": lambda self: self._parse_credentials_property(), 975 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 976 "LOCATION": lambda self: self._parse_location_property(), 977 "TAG": lambda self: self._parse_tag(), 978 "USING": lambda self: self._match_text_seq("TEMPLATE") 979 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 980 } 981 982 TYPE_CONVERTERS = { 983 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 984 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 985 } 986 987 SHOW_PARSERS = { 988 "DATABASES": _show_parser("DATABASES"), 989 "TERSE DATABASES": _show_parser("DATABASES"), 990 "SCHEMAS": _show_parser("SCHEMAS"), 991 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 992 "OBJECTS": _show_parser("OBJECTS"), 993 "TERSE OBJECTS": _show_parser("OBJECTS"), 994 "TABLES": _show_parser("TABLES"), 995 "TERSE TABLES": _show_parser("TABLES"), 996 "VIEWS": _show_parser("VIEWS"), 997 "TERSE VIEWS": _show_parser("VIEWS"), 998 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 999 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 1000 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1001 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 1002 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1003 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 1004 "SEQUENCES": _show_parser("SEQUENCES"), 1005 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 1006 "STAGES": _show_parser("STAGES"), 1007 "COLUMNS": _show_parser("COLUMNS"), 1008 "USERS": _show_parser("USERS"), 1009 "TERSE USERS": _show_parser("USERS"), 1010 "FILE FORMATS": _show_parser("FILE FORMATS"), 1011 "FUNCTIONS": _show_parser("FUNCTIONS"), 1012 "PROCEDURES": _show_parser("PROCEDURES"), 1013 "WAREHOUSES": _show_parser("WAREHOUSES"), 1014 } 1015 1016 CONSTRAINT_PARSERS = { 1017 **parser.Parser.CONSTRAINT_PARSERS, 1018 "WITH": lambda self: self._parse_with_constraint(), 1019 "MASKING": lambda self: self._parse_with_constraint(), 1020 "PROJECTION": lambda self: self._parse_with_constraint(), 1021 "TAG": lambda self: self._parse_with_constraint(), 1022 } 1023 1024 STAGED_FILE_SINGLE_TOKENS = { 1025 TokenType.DOT, 1026 TokenType.MOD, 1027 TokenType.SLASH, 1028 } 1029 1030 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 1031 1032 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 1033 1034 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 1035 1036 LAMBDAS = { 1037 **parser.Parser.LAMBDAS, 1038 TokenType.ARROW: lambda self, expressions: self.expression( 1039 exp.Lambda, 1040 this=self._replace_lambda( 1041 self._parse_assignment(), 1042 expressions, 1043 ), 1044 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 1045 ), 1046 } 1047 1048 COLUMN_OPERATORS = { 1049 **parser.Parser.COLUMN_OPERATORS, 1050 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 1051 exp.ModelAttribute, this=this, expression=attr 1052 ), 1053 } 1054 1055 def _parse_directory(self) -> exp.DirectoryStage: 1056 table = self._parse_table_parts() 1057 1058 if isinstance(table, exp.Table): 1059 table = table.this 1060 1061 return self.expression(exp.DirectoryStage, this=table) 1062 1063 def _parse_use(self) -> exp.Use: 1064 if self._match_text_seq("SECONDARY", "ROLES"): 1065 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 1066 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 1067 return self.expression( 1068 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1069 ) 1070 1071 return super()._parse_use() 1072 1073 def _negate_range( 1074 self, this: t.Optional[exp.Expression] = None 1075 ) -> t.Optional[exp.Expression]: 1076 if not this: 1077 return this 1078 1079 query = this.args.get("query") 1080 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1081 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1082 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1083 # which can produce different results (most likely a SnowFlake bug). 1084 # 1085 # https://docs.snowflake.com/en/sql-reference/functions/in 1086 # Context: https://github.com/tobymao/sqlglot/issues/3890 1087 return self.expression( 1088 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1089 ) 1090 1091 return self.expression(exp.Not, this=this) 1092 1093 def _parse_tag(self) -> exp.Tags: 1094 return self.expression( 1095 exp.Tags, 1096 expressions=self._parse_wrapped_csv(self._parse_property), 1097 ) 1098 1099 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1100 if self._prev.token_type != TokenType.WITH: 1101 self._retreat(self._index - 1) 1102 1103 if self._match_text_seq("MASKING", "POLICY"): 1104 policy = self._parse_column() 1105 return self.expression( 1106 exp.MaskingPolicyColumnConstraint, 1107 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1108 expressions=self._match(TokenType.USING) 1109 and self._parse_wrapped_csv(self._parse_id_var), 1110 ) 1111 if self._match_text_seq("PROJECTION", "POLICY"): 1112 policy = self._parse_column() 1113 return self.expression( 1114 exp.ProjectionPolicyColumnConstraint, 1115 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1116 ) 1117 if self._match(TokenType.TAG): 1118 return self._parse_tag() 1119 1120 return None 1121 1122 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1123 if self._match(TokenType.TAG): 1124 return self._parse_tag() 1125 1126 return super()._parse_with_property() 1127 1128 def _parse_create(self) -> exp.Create | exp.Command: 1129 expression = super()._parse_create() 1130 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1131 # Replace the Table node with the enclosed Identifier 1132 expression.this.replace(expression.this.this) 1133 1134 return expression 1135 1136 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1137 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1138 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1139 this = self._parse_var() or self._parse_type() 1140 1141 if not this: 1142 return None 1143 1144 # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) 1145 expression = ( 1146 self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() 1147 ) 1148 return self.expression(exp.Extract, this=map_date_part(this), expression=expression) 1149 1150 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1151 if is_map: 1152 # Keys are strings in Snowflake's objects, see also: 1153 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1154 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1155 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1156 1157 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1158 1159 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1160 lateral = super()._parse_lateral() 1161 if not lateral: 1162 return lateral 1163 1164 if isinstance(lateral.this, exp.Explode): 1165 table_alias = lateral.args.get("alias") 1166 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1167 if table_alias and not table_alias.args.get("columns"): 1168 table_alias.set("columns", columns) 1169 elif not table_alias: 1170 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1171 1172 return lateral 1173 1174 def _parse_table_parts( 1175 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1176 ) -> exp.Table: 1177 # https://docs.snowflake.com/en/user-guide/querying-stage 1178 if self._match(TokenType.STRING, advance=False): 1179 table = self._parse_string() 1180 elif self._match_text_seq("@", advance=False): 1181 table = self._parse_location_path() 1182 else: 1183 table = None 1184 1185 if table: 1186 file_format = None 1187 pattern = None 1188 1189 wrapped = self._match(TokenType.L_PAREN) 1190 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1191 if self._match_text_seq("FILE_FORMAT", "=>"): 1192 file_format = self._parse_string() or super()._parse_table_parts( 1193 is_db_reference=is_db_reference 1194 ) 1195 elif self._match_text_seq("PATTERN", "=>"): 1196 pattern = self._parse_string() 1197 else: 1198 break 1199 1200 self._match(TokenType.COMMA) 1201 1202 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1203 else: 1204 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1205 1206 return table 1207 1208 def _parse_table( 1209 self, 1210 schema: bool = False, 1211 joins: bool = False, 1212 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1213 parse_bracket: bool = False, 1214 is_db_reference: bool = False, 1215 parse_partition: bool = False, 1216 consume_pipe: bool = False, 1217 ) -> t.Optional[exp.Expression]: 1218 table = super()._parse_table( 1219 schema=schema, 1220 joins=joins, 1221 alias_tokens=alias_tokens, 1222 parse_bracket=parse_bracket, 1223 is_db_reference=is_db_reference, 1224 parse_partition=parse_partition, 1225 ) 1226 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1227 table_from_rows = table.this 1228 for arg in exp.TableFromRows.arg_types: 1229 if arg != "this": 1230 table_from_rows.set(arg, table.args.get(arg)) 1231 1232 table = table_from_rows 1233 1234 return table 1235 1236 def _parse_id_var( 1237 self, 1238 any_token: bool = True, 1239 tokens: t.Optional[t.Collection[TokenType]] = None, 1240 ) -> t.Optional[exp.Expression]: 1241 if self._match_text_seq("IDENTIFIER", "("): 1242 identifier = ( 1243 super()._parse_id_var(any_token=any_token, tokens=tokens) 1244 or self._parse_string() 1245 ) 1246 self._match_r_paren() 1247 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1248 1249 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1250 1251 def _parse_show_snowflake(self, this: str) -> exp.Show: 1252 scope = None 1253 scope_kind = None 1254 1255 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1256 # which is syntactically valid but has no effect on the output 1257 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1258 1259 history = self._match_text_seq("HISTORY") 1260 1261 like = self._parse_string() if self._match(TokenType.LIKE) else None 1262 1263 if self._match(TokenType.IN): 1264 if self._match_text_seq("ACCOUNT"): 1265 scope_kind = "ACCOUNT" 1266 elif self._match_text_seq("CLASS"): 1267 scope_kind = "CLASS" 1268 scope = self._parse_table_parts() 1269 elif self._match_text_seq("APPLICATION"): 1270 scope_kind = "APPLICATION" 1271 if self._match_text_seq("PACKAGE"): 1272 scope_kind += " PACKAGE" 1273 scope = self._parse_table_parts() 1274 elif self._match_set(self.DB_CREATABLES): 1275 scope_kind = self._prev.text.upper() 1276 if self._curr: 1277 scope = self._parse_table_parts() 1278 elif self._curr: 1279 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1280 scope = self._parse_table_parts() 1281 1282 return self.expression( 1283 exp.Show, 1284 terse=terse, 1285 this=this, 1286 history=history, 1287 like=like, 1288 scope=scope, 1289 scope_kind=scope_kind, 1290 starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1291 limit=self._parse_limit(), 1292 from_=self._parse_string() if self._match(TokenType.FROM) else None, 1293 privileges=self._match_text_seq("WITH", "PRIVILEGES") 1294 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1295 ) 1296 1297 def _parse_put(self) -> exp.Put | exp.Command: 1298 if self._curr.token_type != TokenType.STRING: 1299 return self._parse_as_command(self._prev) 1300 1301 return self.expression( 1302 exp.Put, 1303 this=self._parse_string(), 1304 target=self._parse_location_path(), 1305 properties=self._parse_properties(), 1306 ) 1307 1308 def _parse_get(self) -> t.Optional[exp.Expression]: 1309 start = self._prev 1310 1311 # If we detect GET( then we need to parse a function, not a statement 1312 if self._match(TokenType.L_PAREN): 1313 self._retreat(self._index - 2) 1314 return self._parse_expression() 1315 1316 target = self._parse_location_path() 1317 1318 # Parse as command if unquoted file path 1319 if self._curr.token_type == TokenType.URI_START: 1320 return self._parse_as_command(start) 1321 1322 return self.expression( 1323 exp.Get, 1324 this=self._parse_string(), 1325 target=target, 1326 properties=self._parse_properties(), 1327 ) 1328 1329 def _parse_location_property(self) -> exp.LocationProperty: 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1332 1333 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1334 # Parse either a subquery or a staged file 1335 return ( 1336 self._parse_select(table=True, parse_subquery_alias=False) 1337 if self._match(TokenType.L_PAREN, advance=False) 1338 else self._parse_table_parts() 1339 ) 1340 1341 def _parse_location_path(self) -> exp.Var: 1342 start = self._curr 1343 self._advance_any(ignore_reserved=True) 1344 1345 # We avoid consuming a comma token because external tables like @foo and @bar 1346 # can be joined in a query with a comma separator, as well as closing paren 1347 # in case of subqueries 1348 while self._is_connected() and not self._match_set( 1349 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1350 ): 1351 self._advance_any(ignore_reserved=True) 1352 1353 return exp.var(self._find_sql(start, self._prev)) 1354 1355 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1356 this = super()._parse_lambda_arg() 1357 1358 if not this: 1359 return this 1360 1361 typ = self._parse_types() 1362 1363 if typ: 1364 return self.expression(exp.Cast, this=this, to=typ) 1365 1366 return this 1367 1368 def _parse_foreign_key(self) -> exp.ForeignKey: 1369 # inlineFK, the REFERENCES columns are implied 1370 if self._match(TokenType.REFERENCES, advance=False): 1371 return self.expression(exp.ForeignKey) 1372 1373 # outoflineFK, explicitly names the columns 1374 return super()._parse_foreign_key() 1375 1376 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1377 self._match(TokenType.EQ) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 expressions = self._parse_wrapped_options() 1380 else: 1381 expressions = [self._parse_format_name()] 1382 1383 return self.expression( 1384 exp.FileFormatProperty, 1385 expressions=expressions, 1386 ) 1387 1388 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1389 return self.expression( 1390 exp.CredentialsProperty, 1391 expressions=self._parse_wrapped_options(), 1392 ) 1393 1394 def _parse_semantic_view(self) -> exp.SemanticView: 1395 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1396 1397 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1398 if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): 1399 keyword = self._prev.text.lower() 1400 kwargs[keyword] = self._parse_csv(self._parse_disjunction) 1401 elif self._match_text_seq("WHERE"): 1402 kwargs["where"] = self._parse_expression() 1403 else: 1404 self.raise_error("Expecting ) or encountered unexpected keyword") 1405 break 1406 1407 return self.expression(exp.SemanticView, **kwargs) 1408 1409 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 1410 set = super()._parse_set(unset=unset, tag=tag) 1411 1412 if isinstance(set, exp.Set): 1413 for expr in set.expressions: 1414 if isinstance(expr, exp.SetItem): 1415 expr.set("kind", "VARIABLE") 1416 return set
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1418 class Tokenizer(tokens.Tokenizer): 1419 STRING_ESCAPES = ["\\", "'"] 1420 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1421 RAW_STRINGS = ["$$"] 1422 COMMENTS = ["--", "//", ("/*", "*/")] 1423 NESTED_COMMENTS = False 1424 1425 KEYWORDS = { 1426 **tokens.Tokenizer.KEYWORDS, 1427 "BYTEINT": TokenType.INT, 1428 "FILE://": TokenType.URI_START, 1429 "FILE FORMAT": TokenType.FILE_FORMAT, 1430 "GET": TokenType.GET, 1431 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1432 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1433 "MINUS": TokenType.EXCEPT, 1434 "NCHAR VARYING": TokenType.VARCHAR, 1435 "PUT": TokenType.PUT, 1436 "REMOVE": TokenType.COMMAND, 1437 "RM": TokenType.COMMAND, 1438 "SAMPLE": TokenType.TABLE_SAMPLE, 1439 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1440 "SQL_DOUBLE": TokenType.DOUBLE, 1441 "SQL_VARCHAR": TokenType.VARCHAR, 1442 "STAGE": TokenType.STAGE, 1443 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1444 "STREAMLIT": TokenType.STREAMLIT, 1445 "TAG": TokenType.TAG, 1446 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1447 "TOP": TokenType.TOP, 1448 "WAREHOUSE": TokenType.WAREHOUSE, 1449 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 1450 # FLOAT is a synonym for DOUBLE in Snowflake 1451 "FLOAT": TokenType.DOUBLE, 1452 } 1453 KEYWORDS.pop("/*+") 1454 1455 SINGLE_TOKENS = { 1456 **tokens.Tokenizer.SINGLE_TOKENS, 1457 "$": TokenType.PARAMETER, 1458 "!": TokenType.EXCLAMATION, 1459 } 1460 1461 VAR_SINGLE_TOKENS = {"$"} 1462 1463 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1465 class Generator(generator.Generator): 1466 PARAMETER_TOKEN = "$" 1467 MATCHED_BY_SOURCE = False 1468 SINGLE_STRING_INTERVAL = True 1469 JOIN_HINTS = False 1470 TABLE_HINTS = False 1471 QUERY_HINTS = False 1472 AGGREGATE_FILTER_SUPPORTED = False 1473 SUPPORTS_TABLE_COPY = False 1474 COLLATE_IS_FUNC = True 1475 LIMIT_ONLY_LITERALS = True 1476 JSON_KEY_VALUE_PAIR_SEP = "," 1477 INSERT_OVERWRITE = " OVERWRITE INTO" 1478 STRUCT_DELIMITER = ("(", ")") 1479 COPY_PARAMS_ARE_WRAPPED = False 1480 COPY_PARAMS_EQ_REQUIRED = True 1481 STAR_EXCEPT = "EXCLUDE" 1482 SUPPORTS_EXPLODING_PROJECTIONS = False 1483 ARRAY_CONCAT_IS_VAR_LEN = False 1484 SUPPORTS_CONVERT_TIMEZONE = True 1485 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1486 SUPPORTS_MEDIAN = True 1487 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1488 SUPPORTS_DECODE_CASE = True 1489 IS_BOOL_ALLOWED = False 1490 1491 TRANSFORMS = { 1492 **generator.Generator.TRANSFORMS, 1493 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1494 exp.ArgMax: rename_func("MAX_BY"), 1495 exp.ArgMin: rename_func("MIN_BY"), 1496 exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), 1497 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1498 exp.ArrayContains: lambda self, e: self.func( 1499 "ARRAY_CONTAINS", 1500 e.expression 1501 if e.args.get("ensure_variant") is False 1502 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1503 e.this, 1504 ), 1505 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1506 exp.AtTimeZone: lambda self, e: self.func( 1507 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1508 ), 1509 exp.BitwiseOr: rename_func("BITOR"), 1510 exp.BitwiseXor: rename_func("BITXOR"), 1511 exp.BitwiseAnd: rename_func("BITAND"), 1512 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1513 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1514 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1515 exp.BitwiseNot: rename_func("BITNOT"), 1516 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1517 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1518 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1519 exp.CurrentTimestamp: lambda self, e: self.func("SYSDATE") 1520 if e.args.get("sysdate") 1521 else self.function_fallback_sql(e), 1522 exp.Localtimestamp: lambda self, e: self.func("CURRENT_TIMESTAMP", e.this) 1523 if e.this 1524 else "CURRENT_TIMESTAMP", 1525 exp.DateAdd: date_delta_sql("DATEADD"), 1526 exp.DateDiff: date_delta_sql("DATEDIFF"), 1527 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1528 exp.DatetimeDiff: timestampdiff_sql, 1529 exp.DateStrToDate: datestrtodate_sql, 1530 exp.Decrypt: lambda self, e: self.func( 1531 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", 1532 e.this, 1533 e.args.get("passphrase"), 1534 e.args.get("aad"), 1535 e.args.get("encryption_method"), 1536 ), 1537 exp.DecryptRaw: lambda self, e: self.func( 1538 f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", 1539 e.this, 1540 e.args.get("key"), 1541 e.args.get("iv"), 1542 e.args.get("aad"), 1543 e.args.get("encryption_method"), 1544 e.args.get("aead"), 1545 ), 1546 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1547 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1548 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1549 exp.DayOfYear: rename_func("DAYOFYEAR"), 1550 exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), 1551 exp.Explode: rename_func("FLATTEN"), 1552 exp.Extract: lambda self, e: self.func( 1553 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1554 ), 1555 exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), 1556 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1557 exp.FileFormatProperty: lambda self, 1558 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1559 exp.FromTimeZone: lambda self, e: self.func( 1560 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1561 ), 1562 exp.GenerateSeries: lambda self, e: self.func( 1563 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1564 ), 1565 exp.GetExtract: rename_func("GET"), 1566 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1567 exp.If: if_sql(name="IFF", false_value="NULL"), 1568 exp.JSONExtractArray: _json_extract_value_array_sql, 1569 exp.JSONExtractScalar: lambda self, e: self.func( 1570 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1571 ), 1572 exp.JSONKeys: rename_func("OBJECT_KEYS"), 1573 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1574 exp.JSONPathRoot: lambda *_: "", 1575 exp.JSONValueArray: _json_extract_value_array_sql, 1576 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1577 rename_func("EDITDISTANCE") 1578 ), 1579 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1580 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1581 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1582 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1583 exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), 1584 exp.MakeInterval: no_make_interval_sql, 1585 exp.Max: max_or_greatest, 1586 exp.Min: min_or_least, 1587 exp.ParseJSON: lambda self, e: self.func( 1588 f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this 1589 ), 1590 exp.ToBinary: lambda self, e: self.func( 1591 f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") 1592 ), 1593 exp.ToBoolean: lambda self, e: self.func( 1594 f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this 1595 ), 1596 exp.ToDouble: lambda self, e: self.func( 1597 f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") 1598 ), 1599 exp.ToFile: lambda self, e: self.func( 1600 f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") 1601 ), 1602 exp.ToNumber: lambda self, e: self.func( 1603 f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", 1604 e.this, 1605 e.args.get("format"), 1606 e.args.get("precision"), 1607 e.args.get("scale"), 1608 ), 1609 exp.JSONFormat: rename_func("TO_JSON"), 1610 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1611 exp.PercentileCont: transforms.preprocess( 1612 [transforms.add_within_group_for_percentiles] 1613 ), 1614 exp.PercentileDisc: transforms.preprocess( 1615 [transforms.add_within_group_for_percentiles] 1616 ), 1617 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1618 exp.RegexpExtract: _regexpextract_sql, 1619 exp.RegexpExtractAll: _regexpextract_sql, 1620 exp.RegexpILike: _regexpilike_sql, 1621 exp.Rand: rename_func("RANDOM"), 1622 exp.Select: transforms.preprocess( 1623 [ 1624 transforms.eliminate_window_clause, 1625 transforms.eliminate_distinct_on, 1626 transforms.explode_projection_to_unnest(), 1627 transforms.eliminate_semi_and_anti_joins, 1628 _transform_generate_date_array, 1629 _qualify_unnested_columns, 1630 _eliminate_dot_variant_lookup, 1631 ] 1632 ), 1633 exp.SHA: rename_func("SHA1"), 1634 exp.SHA1Digest: rename_func("SHA1_BINARY"), 1635 exp.MD5Digest: rename_func("MD5_BINARY"), 1636 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1637 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1638 exp.LowerHex: rename_func("TO_CHAR"), 1639 exp.SortArray: rename_func("ARRAY_SORT"), 1640 exp.Skewness: rename_func("SKEW"), 1641 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1642 exp.StartsWith: rename_func("STARTSWITH"), 1643 exp.EndsWith: rename_func("ENDSWITH"), 1644 exp.StrPosition: lambda self, e: strposition_sql( 1645 self, e, func_name="CHARINDEX", supports_position=True 1646 ), 1647 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1648 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1649 exp.Stuff: rename_func("INSERT"), 1650 exp.StPoint: rename_func("ST_MAKEPOINT"), 1651 exp.TimeAdd: date_delta_sql("TIMEADD"), 1652 exp.TimeSlice: lambda self, e: self.func( 1653 "TIME_SLICE", 1654 e.this, 1655 e.expression, 1656 unit_to_str(e), 1657 e.args.get("kind"), 1658 ), 1659 exp.Timestamp: no_timestamp_sql, 1660 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1661 exp.TimestampDiff: lambda self, e: self.func( 1662 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1663 ), 1664 exp.TimestampTrunc: timestamptrunc_sql(), 1665 exp.TimeStrToTime: timestrtotime_sql, 1666 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1667 exp.ToArray: rename_func("TO_ARRAY"), 1668 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1669 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1670 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1671 exp.TsOrDsToDate: lambda self, e: self.func( 1672 f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) 1673 ), 1674 exp.TsOrDsToTime: lambda self, e: self.func( 1675 f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) 1676 ), 1677 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1678 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), 1679 exp.Uuid: rename_func("UUID_STRING"), 1680 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1681 exp.Booland: rename_func("BOOLAND"), 1682 exp.Boolor: rename_func("BOOLOR"), 1683 exp.WeekOfYear: rename_func("WEEKISO"), 1684 exp.YearOfWeek: rename_func("YEAROFWEEK"), 1685 exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), 1686 exp.Xor: rename_func("BOOLXOR"), 1687 exp.ByteLength: rename_func("OCTET_LENGTH"), 1688 exp.ArrayConcatAgg: lambda self, e: self.func( 1689 "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) 1690 ), 1691 exp.SHA2Digest: lambda self, e: self.func( 1692 "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) 1693 ), 1694 } 1695 1696 SUPPORTED_JSON_PATH_PARTS = { 1697 exp.JSONPathKey, 1698 exp.JSONPathRoot, 1699 exp.JSONPathSubscript, 1700 } 1701 1702 TYPE_MAPPING = { 1703 **generator.Generator.TYPE_MAPPING, 1704 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1705 exp.DataType.Type.NESTED: "OBJECT", 1706 exp.DataType.Type.STRUCT: "OBJECT", 1707 exp.DataType.Type.TEXT: "VARCHAR", 1708 } 1709 1710 TOKEN_MAPPING = { 1711 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1712 } 1713 1714 PROPERTIES_LOCATION = { 1715 **generator.Generator.PROPERTIES_LOCATION, 1716 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1717 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1718 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1719 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1720 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1721 } 1722 1723 UNSUPPORTED_VALUES_EXPRESSIONS = { 1724 exp.Map, 1725 exp.StarMap, 1726 exp.Struct, 1727 exp.VarMap, 1728 } 1729 1730 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1731 1732 def with_properties(self, properties: exp.Properties) -> str: 1733 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1734 1735 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1736 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1737 values_as_table = False 1738 1739 return super().values_sql(expression, values_as_table=values_as_table) 1740 1741 def datatype_sql(self, expression: exp.DataType) -> str: 1742 # Check if this is a FLOAT type nested inside a VECTOR type 1743 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1744 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1745 if expression.is_type(exp.DataType.Type.DOUBLE): 1746 parent = expression.parent 1747 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1748 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1749 return "FLOAT" 1750 1751 expressions = expression.expressions 1752 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1753 for field_type in expressions: 1754 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1755 if isinstance(field_type, exp.DataType): 1756 return "OBJECT" 1757 if ( 1758 isinstance(field_type, exp.ColumnDef) 1759 and field_type.this 1760 and field_type.this.is_string 1761 ): 1762 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1763 # converting 'foo' into an identifier, we also need to quote it because these 1764 # keys are case-sensitive. For example: 1765 # 1766 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1767 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1768 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1769 1770 return super().datatype_sql(expression) 1771 1772 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1773 return self.func( 1774 "TO_NUMBER", 1775 expression.this, 1776 expression.args.get("format"), 1777 expression.args.get("precision"), 1778 expression.args.get("scale"), 1779 ) 1780 1781 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1782 milli = expression.args.get("milli") 1783 if milli is not None: 1784 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1785 expression.set("nano", milli_to_nano) 1786 1787 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1788 1789 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1790 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1791 return self.func("TO_GEOGRAPHY", expression.this) 1792 if expression.is_type(exp.DataType.Type.GEOMETRY): 1793 return self.func("TO_GEOMETRY", expression.this) 1794 1795 return super().cast_sql(expression, safe_prefix=safe_prefix) 1796 1797 def trycast_sql(self, expression: exp.TryCast) -> str: 1798 value = expression.this 1799 1800 if value.type is None: 1801 from sqlglot.optimizer.annotate_types import annotate_types 1802 1803 value = annotate_types(value, dialect=self.dialect) 1804 1805 # Snowflake requires that TRY_CAST's value be a string 1806 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1807 # if we can deduce that the value is a string, then we can generate TRY_CAST 1808 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1809 return super().trycast_sql(expression) 1810 1811 return self.cast_sql(expression) 1812 1813 def log_sql(self, expression: exp.Log) -> str: 1814 if not expression.expression: 1815 return self.func("LN", expression.this) 1816 1817 return super().log_sql(expression) 1818 1819 def greatest_sql(self, expression: exp.Greatest) -> str: 1820 name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" 1821 return self.func(name, expression.this, *expression.expressions) 1822 1823 def least_sql(self, expression: exp.Least) -> str: 1824 name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" 1825 return self.func(name, expression.this, *expression.expressions) 1826 1827 def unnest_sql(self, expression: exp.Unnest) -> str: 1828 unnest_alias = expression.args.get("alias") 1829 offset = expression.args.get("offset") 1830 1831 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1832 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1833 1834 columns = [ 1835 exp.to_identifier("seq"), 1836 exp.to_identifier("key"), 1837 exp.to_identifier("path"), 1838 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1839 value, 1840 exp.to_identifier("this"), 1841 ] 1842 1843 if unnest_alias: 1844 unnest_alias.set("columns", columns) 1845 else: 1846 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1847 1848 table_input = self.sql(expression.expressions[0]) 1849 if not table_input.startswith("INPUT =>"): 1850 table_input = f"INPUT => {table_input}" 1851 1852 expression_parent = expression.parent 1853 1854 explode = ( 1855 f"FLATTEN({table_input})" 1856 if isinstance(expression_parent, exp.Lateral) 1857 else f"TABLE(FLATTEN({table_input}))" 1858 ) 1859 alias = self.sql(unnest_alias) 1860 alias = f" AS {alias}" if alias else "" 1861 value = ( 1862 "" 1863 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1864 else f"{value} FROM " 1865 ) 1866 1867 return f"{value}{explode}{alias}" 1868 1869 def show_sql(self, expression: exp.Show) -> str: 1870 terse = "TERSE " if expression.args.get("terse") else "" 1871 history = " HISTORY" if expression.args.get("history") else "" 1872 like = self.sql(expression, "like") 1873 like = f" LIKE {like}" if like else "" 1874 1875 scope = self.sql(expression, "scope") 1876 scope = f" {scope}" if scope else "" 1877 1878 scope_kind = self.sql(expression, "scope_kind") 1879 if scope_kind: 1880 scope_kind = f" IN {scope_kind}" 1881 1882 starts_with = self.sql(expression, "starts_with") 1883 if starts_with: 1884 starts_with = f" STARTS WITH {starts_with}" 1885 1886 limit = self.sql(expression, "limit") 1887 1888 from_ = self.sql(expression, "from_") 1889 if from_: 1890 from_ = f" FROM {from_}" 1891 1892 privileges = self.expressions(expression, key="privileges", flat=True) 1893 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1894 1895 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1896 1897 def describe_sql(self, expression: exp.Describe) -> str: 1898 # Default to table if kind is unknown 1899 kind_value = expression.args.get("kind") or "TABLE" 1900 kind = f" {kind_value}" if kind_value else "" 1901 this = f" {self.sql(expression, 'this')}" 1902 expressions = self.expressions(expression, flat=True) 1903 expressions = f" {expressions}" if expressions else "" 1904 return f"DESCRIBE{kind}{this}{expressions}" 1905 1906 def generatedasidentitycolumnconstraint_sql( 1907 self, expression: exp.GeneratedAsIdentityColumnConstraint 1908 ) -> str: 1909 start = expression.args.get("start") 1910 start = f" START {start}" if start else "" 1911 increment = expression.args.get("increment") 1912 increment = f" INCREMENT {increment}" if increment else "" 1913 1914 order = expression.args.get("order") 1915 if order is not None: 1916 order_clause = " ORDER" if order else " NOORDER" 1917 else: 1918 order_clause = "" 1919 1920 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1921 1922 def cluster_sql(self, expression: exp.Cluster) -> str: 1923 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1924 1925 def struct_sql(self, expression: exp.Struct) -> str: 1926 if len(expression.expressions) == 1: 1927 arg = expression.expressions[0] 1928 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1929 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1930 return f"{{{self.sql(expression.expressions[0])}}}" 1931 1932 keys = [] 1933 values = [] 1934 1935 for i, e in enumerate(expression.expressions): 1936 if isinstance(e, exp.PropertyEQ): 1937 keys.append( 1938 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1939 ) 1940 values.append(e.expression) 1941 else: 1942 keys.append(exp.Literal.string(f"_{i}")) 1943 values.append(e) 1944 1945 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1946 1947 @unsupported_args("weight", "accuracy") 1948 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1949 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1950 1951 def alterset_sql(self, expression: exp.AlterSet) -> str: 1952 exprs = self.expressions(expression, flat=True) 1953 exprs = f" {exprs}" if exprs else "" 1954 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1955 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1956 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1957 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1958 tag = self.expressions(expression, key="tag", flat=True) 1959 tag = f" TAG {tag}" if tag else "" 1960 1961 return f"SET{exprs}{file_format}{copy_options}{tag}" 1962 1963 def strtotime_sql(self, expression: exp.StrToTime): 1964 # target_type is stored as a DataType instance 1965 target_type = expression.args.get("target_type") 1966 1967 # Get the type enum from DataType instance or from type annotation 1968 if isinstance(target_type, exp.DataType): 1969 type_enum = target_type.this 1970 elif expression.type: 1971 type_enum = expression.type.this 1972 else: 1973 type_enum = exp.DataType.Type.TIMESTAMP 1974 1975 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 1976 1977 return self.func( 1978 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 1979 expression.this, 1980 self.format_time(expression), 1981 ) 1982 1983 def timestampsub_sql(self, expression: exp.TimestampSub): 1984 return self.sql( 1985 exp.TimestampAdd( 1986 this=expression.this, 1987 expression=expression.expression * -1, 1988 unit=expression.unit, 1989 ) 1990 ) 1991 1992 def jsonextract_sql(self, expression: exp.JSONExtract): 1993 this = expression.this 1994 1995 # JSON strings are valid coming from other dialects such as BQ so 1996 # for these cases we PARSE_JSON preemptively 1997 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1998 "requires_json" 1999 ): 2000 this = exp.ParseJSON(this=this) 2001 2002 return self.func( 2003 "GET_PATH", 2004 this, 2005 expression.expression, 2006 ) 2007 2008 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 2009 this = expression.this 2010 if this.is_string: 2011 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 2012 2013 return self.func("TO_CHAR", this, self.format_time(expression)) 2014 2015 def datesub_sql(self, expression: exp.DateSub) -> str: 2016 value = expression.expression 2017 if value: 2018 value.replace(value * (-1)) 2019 else: 2020 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2021 2022 return date_delta_sql("DATEADD")(self, expression) 2023 2024 def select_sql(self, expression: exp.Select) -> str: 2025 limit = expression.args.get("limit") 2026 offset = expression.args.get("offset") 2027 if offset and not limit: 2028 expression.limit(exp.Null(), copy=False) 2029 return super().select_sql(expression) 2030 2031 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2032 is_materialized = expression.find(exp.MaterializedProperty) 2033 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2034 2035 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2036 # For materialized views, COPY GRANTS is located *before* the columns list 2037 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2038 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2039 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2040 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2041 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2042 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2043 2044 this_name = self.sql(expression.this, "this") 2045 copy_grants = self.sql(copy_grants_property) 2046 this_schema = self.schema_columns_sql(expression.this) 2047 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2048 2049 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2050 2051 return super().createable_sql(expression, locations) 2052 2053 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2054 this = expression.this 2055 2056 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2057 # and add it later as part of the WITHIN GROUP clause 2058 order = this if isinstance(this, exp.Order) else None 2059 if order: 2060 expression.set("this", order.this.pop()) 2061 2062 expr_sql = super().arrayagg_sql(expression) 2063 2064 if order: 2065 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2066 2067 return expr_sql 2068 2069 def array_sql(self, expression: exp.Array) -> str: 2070 expressions = expression.expressions 2071 2072 first_expr = seq_get(expressions, 0) 2073 if isinstance(first_expr, exp.Select): 2074 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2075 if first_expr.text("kind").upper() == "STRUCT": 2076 object_construct_args = [] 2077 for expr in first_expr.expressions: 2078 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2079 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2080 name = expr.this if isinstance(expr, exp.Alias) else expr 2081 2082 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2083 2084 array_agg = exp.ArrayAgg( 2085 this=_build_object_construct(args=object_construct_args) 2086 ) 2087 2088 first_expr.set("kind", None) 2089 first_expr.set("expressions", [array_agg]) 2090 2091 return self.sql(first_expr.subquery()) 2092 2093 return inline_array_sql(self, expression) 2094 2095 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2096 zone = self.sql(expression, "this") 2097 if not zone: 2098 return super().currentdate_sql(expression) 2099 2100 expr = exp.Cast( 2101 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2102 to=exp.DataType(this=exp.DataType.Type.DATE), 2103 ) 2104 return self.sql(expr) 2105 2106 def dot_sql(self, expression: exp.Dot) -> str: 2107 this = expression.this 2108 2109 if not this.type: 2110 from sqlglot.optimizer.annotate_types import annotate_types 2111 2112 this = annotate_types(this, dialect=self.dialect) 2113 2114 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2115 # Generate colon notation for the top level STRUCT 2116 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2117 2118 return super().dot_sql(expression) 2119 2120 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 2121 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 2122 2123 def format_sql(self, expression: exp.Format) -> str: 2124 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 2125 return self.func("TO_CHAR", expression.expressions[0]) 2126 2127 return self.function_fallback_sql(expression) 2128 2129 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2130 # Set part_index to 1 if missing 2131 if not expression.args.get("delimiter"): 2132 expression.set("delimiter", exp.Literal.string(" ")) 2133 2134 if not expression.args.get("part_index"): 2135 expression.set("part_index", exp.Literal.number(1)) 2136 2137 return rename_func("SPLIT_PART")(self, expression) 2138 2139 def uniform_sql(self, expression: exp.Uniform) -> str: 2140 gen = expression.args.get("gen") 2141 seed = expression.args.get("seed") 2142 2143 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2144 if seed: 2145 gen = exp.Rand(this=seed) 2146 2147 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2148 if not gen: 2149 gen = exp.Rand() 2150 2151 return self.func("UNIFORM", expression.this, expression.expression, gen)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1741 def datatype_sql(self, expression: exp.DataType) -> str: 1742 # Check if this is a FLOAT type nested inside a VECTOR type 1743 # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types 1744 # https://docs.snowflake.com/en/sql-reference/data-types-vector 1745 if expression.is_type(exp.DataType.Type.DOUBLE): 1746 parent = expression.parent 1747 if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR): 1748 # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE 1749 return "FLOAT" 1750 1751 expressions = expression.expressions 1752 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1753 for field_type in expressions: 1754 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1755 if isinstance(field_type, exp.DataType): 1756 return "OBJECT" 1757 if ( 1758 isinstance(field_type, exp.ColumnDef) 1759 and field_type.this 1760 and field_type.this.is_string 1761 ): 1762 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1763 # converting 'foo' into an identifier, we also need to quote it because these 1764 # keys are case-sensitive. For example: 1765 # 1766 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1767 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1768 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1769 1770 return super().datatype_sql(expression)
1781 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1782 milli = expression.args.get("milli") 1783 if milli is not None: 1784 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1785 expression.set("nano", milli_to_nano) 1786 1787 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1789 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1790 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1791 return self.func("TO_GEOGRAPHY", expression.this) 1792 if expression.is_type(exp.DataType.Type.GEOMETRY): 1793 return self.func("TO_GEOMETRY", expression.this) 1794 1795 return super().cast_sql(expression, safe_prefix=safe_prefix)
1797 def trycast_sql(self, expression: exp.TryCast) -> str: 1798 value = expression.this 1799 1800 if value.type is None: 1801 from sqlglot.optimizer.annotate_types import annotate_types 1802 1803 value = annotate_types(value, dialect=self.dialect) 1804 1805 # Snowflake requires that TRY_CAST's value be a string 1806 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1807 # if we can deduce that the value is a string, then we can generate TRY_CAST 1808 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1809 return super().trycast_sql(expression) 1810 1811 return self.cast_sql(expression)
1827 def unnest_sql(self, expression: exp.Unnest) -> str: 1828 unnest_alias = expression.args.get("alias") 1829 offset = expression.args.get("offset") 1830 1831 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1832 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1833 1834 columns = [ 1835 exp.to_identifier("seq"), 1836 exp.to_identifier("key"), 1837 exp.to_identifier("path"), 1838 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1839 value, 1840 exp.to_identifier("this"), 1841 ] 1842 1843 if unnest_alias: 1844 unnest_alias.set("columns", columns) 1845 else: 1846 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1847 1848 table_input = self.sql(expression.expressions[0]) 1849 if not table_input.startswith("INPUT =>"): 1850 table_input = f"INPUT => {table_input}" 1851 1852 expression_parent = expression.parent 1853 1854 explode = ( 1855 f"FLATTEN({table_input})" 1856 if isinstance(expression_parent, exp.Lateral) 1857 else f"TABLE(FLATTEN({table_input}))" 1858 ) 1859 alias = self.sql(unnest_alias) 1860 alias = f" AS {alias}" if alias else "" 1861 value = ( 1862 "" 1863 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1864 else f"{value} FROM " 1865 ) 1866 1867 return f"{value}{explode}{alias}"
1869 def show_sql(self, expression: exp.Show) -> str: 1870 terse = "TERSE " if expression.args.get("terse") else "" 1871 history = " HISTORY" if expression.args.get("history") else "" 1872 like = self.sql(expression, "like") 1873 like = f" LIKE {like}" if like else "" 1874 1875 scope = self.sql(expression, "scope") 1876 scope = f" {scope}" if scope else "" 1877 1878 scope_kind = self.sql(expression, "scope_kind") 1879 if scope_kind: 1880 scope_kind = f" IN {scope_kind}" 1881 1882 starts_with = self.sql(expression, "starts_with") 1883 if starts_with: 1884 starts_with = f" STARTS WITH {starts_with}" 1885 1886 limit = self.sql(expression, "limit") 1887 1888 from_ = self.sql(expression, "from_") 1889 if from_: 1890 from_ = f" FROM {from_}" 1891 1892 privileges = self.expressions(expression, key="privileges", flat=True) 1893 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1894 1895 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1897 def describe_sql(self, expression: exp.Describe) -> str: 1898 # Default to table if kind is unknown 1899 kind_value = expression.args.get("kind") or "TABLE" 1900 kind = f" {kind_value}" if kind_value else "" 1901 this = f" {self.sql(expression, 'this')}" 1902 expressions = self.expressions(expression, flat=True) 1903 expressions = f" {expressions}" if expressions else "" 1904 return f"DESCRIBE{kind}{this}{expressions}"
1906 def generatedasidentitycolumnconstraint_sql( 1907 self, expression: exp.GeneratedAsIdentityColumnConstraint 1908 ) -> str: 1909 start = expression.args.get("start") 1910 start = f" START {start}" if start else "" 1911 increment = expression.args.get("increment") 1912 increment = f" INCREMENT {increment}" if increment else "" 1913 1914 order = expression.args.get("order") 1915 if order is not None: 1916 order_clause = " ORDER" if order else " NOORDER" 1917 else: 1918 order_clause = "" 1919 1920 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1925 def struct_sql(self, expression: exp.Struct) -> str: 1926 if len(expression.expressions) == 1: 1927 arg = expression.expressions[0] 1928 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1929 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1930 return f"{{{self.sql(expression.expressions[0])}}}" 1931 1932 keys = [] 1933 values = [] 1934 1935 for i, e in enumerate(expression.expressions): 1936 if isinstance(e, exp.PropertyEQ): 1937 keys.append( 1938 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1939 ) 1940 values.append(e.expression) 1941 else: 1942 keys.append(exp.Literal.string(f"_{i}")) 1943 values.append(e) 1944 1945 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1951 def alterset_sql(self, expression: exp.AlterSet) -> str: 1952 exprs = self.expressions(expression, flat=True) 1953 exprs = f" {exprs}" if exprs else "" 1954 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1955 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1956 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1957 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1958 tag = self.expressions(expression, key="tag", flat=True) 1959 tag = f" TAG {tag}" if tag else "" 1960 1961 return f"SET{exprs}{file_format}{copy_options}{tag}"
1963 def strtotime_sql(self, expression: exp.StrToTime): 1964 # target_type is stored as a DataType instance 1965 target_type = expression.args.get("target_type") 1966 1967 # Get the type enum from DataType instance or from type annotation 1968 if isinstance(target_type, exp.DataType): 1969 type_enum = target_type.this 1970 elif expression.type: 1971 type_enum = expression.type.this 1972 else: 1973 type_enum = exp.DataType.Type.TIMESTAMP 1974 1975 func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") 1976 1977 return self.func( 1978 f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", 1979 expression.this, 1980 self.format_time(expression), 1981 )
1992 def jsonextract_sql(self, expression: exp.JSONExtract): 1993 this = expression.this 1994 1995 # JSON strings are valid coming from other dialects such as BQ so 1996 # for these cases we PARSE_JSON preemptively 1997 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1998 "requires_json" 1999 ): 2000 this = exp.ParseJSON(this=this) 2001 2002 return self.func( 2003 "GET_PATH", 2004 this, 2005 expression.expression, 2006 )
2015 def datesub_sql(self, expression: exp.DateSub) -> str: 2016 value = expression.expression 2017 if value: 2018 value.replace(value * (-1)) 2019 else: 2020 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 2021 2022 return date_delta_sql("DATEADD")(self, expression)
2031 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 2032 is_materialized = expression.find(exp.MaterializedProperty) 2033 copy_grants_property = expression.find(exp.CopyGrantsProperty) 2034 2035 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 2036 # For materialized views, COPY GRANTS is located *before* the columns list 2037 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 2038 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 2039 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 2040 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 2041 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 2042 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 2043 2044 this_name = self.sql(expression.this, "this") 2045 copy_grants = self.sql(copy_grants_property) 2046 this_schema = self.schema_columns_sql(expression.this) 2047 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 2048 2049 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 2050 2051 return super().createable_sql(expression, locations)
2053 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 2054 this = expression.this 2055 2056 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 2057 # and add it later as part of the WITHIN GROUP clause 2058 order = this if isinstance(this, exp.Order) else None 2059 if order: 2060 expression.set("this", order.this.pop()) 2061 2062 expr_sql = super().arrayagg_sql(expression) 2063 2064 if order: 2065 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 2066 2067 return expr_sql
2069 def array_sql(self, expression: exp.Array) -> str: 2070 expressions = expression.expressions 2071 2072 first_expr = seq_get(expressions, 0) 2073 if isinstance(first_expr, exp.Select): 2074 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 2075 if first_expr.text("kind").upper() == "STRUCT": 2076 object_construct_args = [] 2077 for expr in first_expr.expressions: 2078 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 2079 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 2080 name = expr.this if isinstance(expr, exp.Alias) else expr 2081 2082 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 2083 2084 array_agg = exp.ArrayAgg( 2085 this=_build_object_construct(args=object_construct_args) 2086 ) 2087 2088 first_expr.set("kind", None) 2089 first_expr.set("expressions", [array_agg]) 2090 2091 return self.sql(first_expr.subquery()) 2092 2093 return inline_array_sql(self, expression)
2095 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 2096 zone = self.sql(expression, "this") 2097 if not zone: 2098 return super().currentdate_sql(expression) 2099 2100 expr = exp.Cast( 2101 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 2102 to=exp.DataType(this=exp.DataType.Type.DATE), 2103 ) 2104 return self.sql(expr)
2106 def dot_sql(self, expression: exp.Dot) -> str: 2107 this = expression.this 2108 2109 if not this.type: 2110 from sqlglot.optimizer.annotate_types import annotate_types 2111 2112 this = annotate_types(this, dialect=self.dialect) 2113 2114 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 2115 # Generate colon notation for the top level STRUCT 2116 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 2117 2118 return super().dot_sql(expression)
2129 def splitpart_sql(self, expression: exp.SplitPart) -> str: 2130 # Set part_index to 1 if missing 2131 if not expression.args.get("delimiter"): 2132 expression.set("delimiter", exp.Literal.string(" ")) 2133 2134 if not expression.args.get("part_index"): 2135 expression.set("part_index", exp.Literal.number(1)) 2136 2137 return rename_func("SPLIT_PART")(self, expression)
2139 def uniform_sql(self, expression: exp.Uniform) -> str: 2140 gen = expression.args.get("gen") 2141 seed = expression.args.get("seed") 2142 2143 # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) 2144 if seed: 2145 gen = exp.Rand(this=seed) 2146 2147 # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() 2148 if not gen: 2149 gen = exp.Rand() 2150 2151 return self.func("UNIFORM", expression.this, expression.expression, gen)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql