sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 annotate_with_type_lambda, 10 build_timetostr_or_tochar, 11 binary_from_function, 12 build_default_decimal_type, 13 build_replace_with_optional_replacement, 14 build_timestamp_from_parts, 15 date_delta_sql, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 build_formatted_time, 19 if_sql, 20 inline_array_sql, 21 max_or_greatest, 22 min_or_least, 23 rename_func, 24 timestamptrunc_sql, 25 timestrtotime_sql, 26 var_map_sql, 27 map_date_part, 28 no_timestamp_sql, 29 strposition_sql, 30 timestampdiff_sql, 31 no_make_interval_sql, 32 groupconcat_sql, 33) 34from sqlglot.generator import unsupported_args 35from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 36from sqlglot.optimizer.annotate_types import TypeAnnotator 37from sqlglot.optimizer.scope import build_scope, find_all_in_scope 38from sqlglot.tokens import TokenType 39 40if t.TYPE_CHECKING: 41 from sqlglot._typing import E, B 42 43 44def _build_strtok(args: t.List) -> exp.SplitPart: 45 # Add default delimiter (space) if missing - per Snowflake docs 46 if len(args) == 1: 47 args.append(exp.Literal.string(" ")) 48 49 # Add default part_index (1) if missing 50 if len(args) == 2: 51 args.append(exp.Literal.number(1)) 52 53 return exp.SplitPart.from_arg_list(args) 54 55 56def _build_datetime( 57 name: str, kind: exp.DataType.Type, safe: bool = False 58) -> t.Callable[[t.List], exp.Func]: 59 def _builder(args: t.List) -> exp.Func: 60 value = seq_get(args, 0) 61 scale_or_fmt = seq_get(args, 1) 62 63 int_value = value is not None and is_int(value.name) 64 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 65 66 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 67 # Converts calls like `TO_TIME('01:02:03')` into casts 68 if len(args) == 1 and value.is_string and not int_value: 69 return ( 70 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 71 if safe 72 else exp.cast(value, kind) 73 ) 74 75 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 76 # cases so we can transpile them, since they're relatively common 77 if kind == exp.DataType.Type.TIMESTAMP: 78 if not safe and (int_value or int_scale_or_fmt): 79 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 80 # it's not easily transpilable 81 return exp.UnixToTime(this=value, scale=scale_or_fmt) 82 if not int_scale_or_fmt and not is_float(value.name): 83 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 84 expr.set("safe", safe) 85 return expr 86 87 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 88 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 89 formatted_exp = build_formatted_time(klass, "snowflake")(args) 90 formatted_exp.set("safe", safe) 91 return formatted_exp 92 93 return exp.Anonymous(this=name, expressions=args) 94 95 return _builder 96 97 98def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 99 expression = parser.build_var_map(args) 100 101 if isinstance(expression, exp.StarMap): 102 return expression 103 104 return exp.Struct( 105 expressions=[ 106 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 107 ] 108 ) 109 110 111def _build_datediff(args: t.List) -> exp.DateDiff: 112 return exp.DateDiff( 113 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 114 ) 115 116 117def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 118 def _builder(args: t.List) -> E: 119 return expr_type( 120 this=seq_get(args, 2), 121 expression=seq_get(args, 1), 122 unit=map_date_part(seq_get(args, 0)), 123 ) 124 125 return _builder 126 127 128def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 129 def _builder(args: t.List) -> B | exp.Anonymous: 130 if len(args) == 3: 131 return exp.Anonymous(this=name, expressions=args) 132 133 return binary_from_function(expr_type)(args) 134 135 return _builder 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/div0 139def _build_if_from_div0(args: t.List) -> exp.If: 140 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 141 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 142 143 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 144 exp.Is(this=lhs, expression=exp.null()).not_() 145 ) 146 true = exp.Literal.number(0) 147 false = exp.Div(this=lhs, expression=rhs) 148 return exp.If(this=cond, true=true, false=false) 149 150 151# https://docs.snowflake.com/en/sql-reference/functions/div0null 152def _build_if_from_div0null(args: t.List) -> exp.If: 153 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 154 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 155 156 # Returns 0 when divisor is 0 OR NULL 157 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).or_( 158 exp.Is(this=rhs, expression=exp.null()) 159 ) 160 true = exp.Literal.number(0) 161 false = exp.Div(this=lhs, expression=rhs) 162 return exp.If(this=cond, true=true, false=false) 163 164 165# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 166def _build_if_from_zeroifnull(args: t.List) -> exp.If: 167 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 168 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 169 170 171def _build_search(args: t.List) -> exp.Search: 172 kwargs = { 173 "this": seq_get(args, 0), 174 "expression": seq_get(args, 1), 175 **{arg.name.lower(): arg for arg in args[2:] if isinstance(arg, exp.Kwarg)}, 176 } 177 return exp.Search(**kwargs) 178 179 180# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 181def _build_if_from_nullifzero(args: t.List) -> exp.If: 182 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 183 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 184 185 186def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 187 flag = expression.text("flag") 188 189 if "i" not in flag: 190 flag += "i" 191 192 return self.func( 193 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 194 ) 195 196 197def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 198 regexp_replace = exp.RegexpReplace.from_arg_list(args) 199 200 if not regexp_replace.args.get("replacement"): 201 regexp_replace.set("replacement", exp.Literal.string("")) 202 203 return regexp_replace 204 205 206def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 207 def _parse(self: Snowflake.Parser) -> exp.Show: 208 return self._parse_show_snowflake(*args, **kwargs) 209 210 return _parse 211 212 213def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 214 trunc = date_trunc_to_time(args) 215 trunc.set("unit", map_date_part(trunc.args["unit"])) 216 return trunc 217 218 219def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 220 """ 221 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 222 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 223 224 Example: 225 >>> from sqlglot import parse_one 226 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 227 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 228 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 229 """ 230 if isinstance(expression, exp.Pivot): 231 if expression.unpivot: 232 expression = transforms.unqualify_columns(expression) 233 else: 234 for field in expression.fields: 235 field_expr = seq_get(field.expressions if field else [], 0) 236 237 if isinstance(field_expr, exp.PivotAny): 238 unqualified_field_expr = transforms.unqualify_columns(field_expr) 239 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 240 241 return expression 242 243 244def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 245 assert isinstance(expression, exp.Create) 246 247 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 248 if expression.this in exp.DataType.NESTED_TYPES: 249 expression.set("expressions", None) 250 return expression 251 252 props = expression.args.get("properties") 253 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 254 for schema_expression in expression.this.expressions: 255 if isinstance(schema_expression, exp.ColumnDef): 256 column_type = schema_expression.kind 257 if isinstance(column_type, exp.DataType): 258 column_type.transform(_flatten_structured_type, copy=False) 259 260 return expression 261 262 263def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 264 generate_date_array = unnest.expressions[0] 265 start = generate_date_array.args.get("start") 266 end = generate_date_array.args.get("end") 267 step = generate_date_array.args.get("step") 268 269 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 270 return 271 272 unit = step.args.get("unit") 273 274 unnest_alias = unnest.args.get("alias") 275 if unnest_alias: 276 unnest_alias = unnest_alias.copy() 277 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 278 else: 279 sequence_value_name = "value" 280 281 # We'll add the next sequence value to the starting date and project the result 282 date_add = _build_date_time_add(exp.DateAdd)( 283 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 284 ) 285 286 # We use DATEDIFF to compute the number of sequence values needed 287 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 288 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 289 ) 290 291 unnest.set("expressions", [number_sequence]) 292 293 unnest_parent = unnest.parent 294 if isinstance(unnest_parent, exp.Join): 295 select = unnest_parent.parent 296 if isinstance(select, exp.Select): 297 replace_column_name = ( 298 sequence_value_name 299 if isinstance(sequence_value_name, str) 300 else sequence_value_name.name 301 ) 302 303 scope = build_scope(select) 304 if scope: 305 for column in scope.columns: 306 if column.name.lower() == replace_column_name.lower(): 307 column.replace( 308 date_add.as_(replace_column_name) 309 if isinstance(column.parent, exp.Select) 310 else date_add 311 ) 312 313 lateral = exp.Lateral(this=unnest_parent.this.pop()) 314 unnest_parent.replace(exp.Join(this=lateral)) 315 else: 316 unnest.replace( 317 exp.select(date_add.as_(sequence_value_name)) 318 .from_(unnest.copy()) 319 .subquery(unnest_alias) 320 ) 321 322 323def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 324 if isinstance(expression, exp.Select): 325 for generate_date_array in expression.find_all(exp.GenerateDateArray): 326 parent = generate_date_array.parent 327 328 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 329 # query is the following (it'll be unnested properly on the next iteration due to copy): 330 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 331 if not isinstance(parent, exp.Unnest): 332 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 333 generate_date_array.replace( 334 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 335 ) 336 337 if ( 338 isinstance(parent, exp.Unnest) 339 and isinstance(parent.parent, (exp.From, exp.Join)) 340 and len(parent.expressions) == 1 341 ): 342 _unnest_generate_date_array(parent) 343 344 return expression 345 346 347def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 348 def _builder(args: t.List) -> E: 349 return expr_type( 350 this=seq_get(args, 0), 351 expression=seq_get(args, 1), 352 position=seq_get(args, 2), 353 occurrence=seq_get(args, 3), 354 parameters=seq_get(args, 4), 355 group=seq_get(args, 5) or exp.Literal.number(0), 356 ) 357 358 return _builder 359 360 361def _build_like(expr_type: t.Type[E]) -> t.Callable[[t.List], E | exp.Escape]: 362 def _builder(args: t.List) -> E | exp.Escape: 363 like_expr = expr_type(this=args[0], expression=args[1]) 364 escape = seq_get(args, 2) 365 return exp.Escape(this=like_expr, expression=escape) if escape else like_expr 366 367 return _builder 368 369 370def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 371 # Other dialects don't support all of the following parameters, so we need to 372 # generate default values as necessary to ensure the transpilation is correct 373 group = expression.args.get("group") 374 375 # To avoid generating all these default values, we set group to None if 376 # it's 0 (also default value) which doesn't trigger the following chain 377 if group and group.name == "0": 378 group = None 379 380 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 381 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 382 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 383 384 return self.func( 385 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 386 expression.this, 387 expression.expression, 388 position, 389 occurrence, 390 parameters, 391 group, 392 ) 393 394 395def _json_extract_value_array_sql( 396 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 397) -> str: 398 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 399 ident = exp.to_identifier("x") 400 401 if isinstance(expression, exp.JSONValueArray): 402 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 403 else: 404 this = exp.ParseJSON(this=f"TO_JSON({ident})") 405 406 transform_lambda = exp.Lambda(expressions=[ident], this=this) 407 408 return self.func("TRANSFORM", json_extract, transform_lambda) 409 410 411def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 412 if isinstance(expression, exp.Select): 413 scope = build_scope(expression) 414 if not scope: 415 return expression 416 417 unnests = list(scope.find_all(exp.Unnest)) 418 419 if not unnests: 420 return expression 421 422 taken_source_names = set(scope.sources) 423 column_source: t.Dict[str, exp.Identifier] = {} 424 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 425 426 unnest_identifier: t.Optional[exp.Identifier] = None 427 orig_expression = expression.copy() 428 429 for unnest in unnests: 430 if not isinstance(unnest.parent, (exp.From, exp.Join)): 431 continue 432 433 # Try to infer column names produced by an unnest operator. This is only possible 434 # when we can peek into the (statically known) contents of the unnested value. 435 unnest_columns: t.Set[str] = set() 436 for unnest_expr in unnest.expressions: 437 if not isinstance(unnest_expr, exp.Array): 438 continue 439 440 for array_expr in unnest_expr.expressions: 441 if not ( 442 isinstance(array_expr, exp.Struct) 443 and array_expr.expressions 444 and all( 445 isinstance(struct_expr, exp.PropertyEQ) 446 for struct_expr in array_expr.expressions 447 ) 448 ): 449 continue 450 451 unnest_columns.update( 452 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 453 ) 454 break 455 456 if unnest_columns: 457 break 458 459 unnest_alias = unnest.args.get("alias") 460 if not unnest_alias: 461 alias_name = find_new_name(taken_source_names, "value") 462 taken_source_names.add(alias_name) 463 464 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 465 # will be corrected later, when we generate SQL for the `Unnest` AST node. 466 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 467 scope.replace(unnest, aliased_unnest) 468 469 unnest_identifier = aliased_unnest.args["alias"].columns[0] 470 else: 471 alias_columns = getattr(unnest_alias, "columns", []) 472 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 473 474 if not isinstance(unnest_identifier, exp.Identifier): 475 return orig_expression 476 477 unnest_to_identifier[unnest] = unnest_identifier 478 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 479 480 for column in scope.columns: 481 if column.table: 482 continue 483 484 table = column_source.get(column.name.lower()) 485 if ( 486 unnest_identifier 487 and not table 488 and len(scope.sources) == 1 489 and column.name.lower() != unnest_identifier.name.lower() 490 ): 491 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 492 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 493 if ( 494 isinstance(unnest_ancestor, exp.Unnest) 495 and ancestor_identifier 496 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 497 ): 498 continue 499 500 table = unnest_identifier 501 502 column.set("table", table and table.copy()) 503 504 return expression 505 506 507def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 508 if isinstance(expression, exp.Select): 509 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 510 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 511 # by Snowflake's parser. 512 # 513 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 514 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 515 unnest_aliases = set() 516 for unnest in find_all_in_scope(expression, exp.Unnest): 517 unnest_alias = unnest.args.get("alias") 518 if ( 519 isinstance(unnest_alias, exp.TableAlias) 520 and not unnest_alias.this 521 and len(unnest_alias.columns) == 1 522 ): 523 unnest_aliases.add(unnest_alias.columns[0].name) 524 525 if unnest_aliases: 526 for c in find_all_in_scope(expression, exp.Column): 527 if c.table in unnest_aliases: 528 bracket_lhs = c.args["table"] 529 bracket_rhs = exp.Literal.string(c.name) 530 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 531 532 if c.parent is expression: 533 # Retain column projection names by using aliases 534 c.replace(exp.alias_(bracket, c.this.copy())) 535 else: 536 c.replace(bracket) 537 538 return expression 539 540 541def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: 542 expression = self._annotate_by_args(expression, "this") 543 if expression.is_type(exp.DataType.Type.NULL): 544 # Snowflake treats REVERSE(NULL) as a VARCHAR 545 self._set_type(expression, exp.DataType.Type.VARCHAR) 546 547 return expression 548 549 550class Snowflake(Dialect): 551 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 552 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 553 NULL_ORDERING = "nulls_are_large" 554 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 555 SUPPORTS_USER_DEFINED_TYPES = False 556 SUPPORTS_SEMI_ANTI_JOIN = False 557 PREFER_CTE_ALIAS_COLUMN = True 558 TABLESAMPLE_SIZE_IS_PERCENT = True 559 COPY_PARAMS_ARE_CSV = False 560 ARRAY_AGG_INCLUDES_NULLS = None 561 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 562 TRY_CAST_REQUIRES_STRING = True 563 564 TYPE_TO_EXPRESSIONS = { 565 **Dialect.TYPE_TO_EXPRESSIONS, 566 exp.DataType.Type.DOUBLE: { 567 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DOUBLE], 568 exp.Cos, 569 exp.Cosh, 570 exp.Cot, 571 exp.Degrees, 572 exp.Exp, 573 exp.Sin, 574 exp.Tan, 575 exp.Asin, 576 exp.Atanh, 577 exp.Cbrt, 578 }, 579 exp.DataType.Type.INT: { 580 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 581 exp.Ascii, 582 exp.ByteLength, 583 exp.Length, 584 exp.RtrimmedLength, 585 exp.BitLength, 586 exp.Levenshtein, 587 exp.JarowinklerSimilarity, 588 exp.StrPosition, 589 exp.Unicode, 590 }, 591 exp.DataType.Type.VARCHAR: { 592 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 593 exp.Base64DecodeString, 594 exp.TryBase64DecodeString, 595 exp.Base64Encode, 596 exp.DecompressString, 597 exp.MD5, 598 exp.AIAgg, 599 exp.AIClassify, 600 exp.AISummarizeAgg, 601 exp.Chr, 602 exp.Collate, 603 exp.Collation, 604 exp.HexDecodeString, 605 exp.TryHexDecodeString, 606 exp.HexEncode, 607 exp.Initcap, 608 exp.RegexpExtract, 609 exp.RegexpReplace, 610 exp.Repeat, 611 exp.Replace, 612 exp.SHA, 613 exp.SHA2, 614 exp.Soundex, 615 exp.SoundexP123, 616 exp.Space, 617 exp.SplitPart, 618 exp.Translate, 619 exp.Uuid, 620 }, 621 exp.DataType.Type.BINARY: { 622 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 623 exp.Base64DecodeBinary, 624 exp.TryBase64DecodeBinary, 625 exp.TryHexDecodeBinary, 626 exp.Compress, 627 exp.DecompressBinary, 628 exp.MD5Digest, 629 exp.SHA1Digest, 630 exp.SHA2Digest, 631 exp.Unhex, 632 }, 633 exp.DataType.Type.BIGINT: { 634 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 635 exp.Factorial, 636 exp.MD5NumberLower64, 637 exp.MD5NumberUpper64, 638 }, 639 exp.DataType.Type.ARRAY: { 640 exp.Split, 641 exp.RegexpExtractAll, 642 exp.StringToArray, 643 }, 644 exp.DataType.Type.OBJECT: { 645 exp.ParseUrl, 646 exp.ParseIp, 647 }, 648 exp.DataType.Type.DECIMAL: { 649 exp.RegexpCount, 650 }, 651 exp.DataType.Type.BOOLEAN: { 652 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN], 653 exp.Search, 654 }, 655 } 656 657 ANNOTATORS = { 658 **Dialect.ANNOTATORS, 659 **{ 660 expr_type: annotate_with_type_lambda(data_type) 661 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 662 for expr_type in expressions 663 }, 664 **{ 665 expr_type: lambda self, e: self._annotate_by_args(e, "this") 666 for expr_type in ( 667 exp.Floor, 668 exp.Left, 669 exp.Pad, 670 exp.Right, 671 exp.Stuff, 672 exp.Substring, 673 exp.Round, 674 ) 675 }, 676 **{ 677 expr_type: lambda self, e: self._annotate_with_type( 678 e, exp.DataType.build("NUMBER", dialect="snowflake") 679 ) 680 for expr_type in ( 681 exp.RegexpCount, 682 exp.RegexpInstr, 683 ) 684 }, 685 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 686 exp.Reverse: _annotate_reverse, 687 } 688 689 TIME_MAPPING = { 690 "YYYY": "%Y", 691 "yyyy": "%Y", 692 "YY": "%y", 693 "yy": "%y", 694 "MMMM": "%B", 695 "mmmm": "%B", 696 "MON": "%b", 697 "mon": "%b", 698 "MM": "%m", 699 "mm": "%m", 700 "DD": "%d", 701 "dd": "%-d", 702 "DY": "%a", 703 "dy": "%w", 704 "HH24": "%H", 705 "hh24": "%H", 706 "HH12": "%I", 707 "hh12": "%I", 708 "MI": "%M", 709 "mi": "%M", 710 "SS": "%S", 711 "ss": "%S", 712 "FF6": "%f", 713 "ff6": "%f", 714 } 715 716 DATE_PART_MAPPING = { 717 **Dialect.DATE_PART_MAPPING, 718 "ISOWEEK": "WEEKISO", 719 } 720 721 def quote_identifier(self, expression: E, identify: bool = True) -> E: 722 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 723 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 724 if ( 725 isinstance(expression, exp.Identifier) 726 and isinstance(expression.parent, exp.Table) 727 and expression.name.lower() == "dual" 728 ): 729 return expression # type: ignore 730 731 return super().quote_identifier(expression, identify=identify) 732 733 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 734 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 735 SINGLE_TOKENS.pop("$") 736 737 class Parser(parser.Parser): 738 IDENTIFY_PIVOT_STRINGS = True 739 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 740 COLON_IS_VARIANT_EXTRACT = True 741 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 742 743 ID_VAR_TOKENS = { 744 *parser.Parser.ID_VAR_TOKENS, 745 TokenType.EXCEPT, 746 TokenType.MATCH_CONDITION, 747 } 748 749 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 750 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 751 752 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 753 754 FUNCTIONS = { 755 **parser.Parser.FUNCTIONS, 756 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 757 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 758 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 759 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 760 ), 761 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 762 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 763 start=seq_get(args, 0), 764 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 765 step=seq_get(args, 2), 766 ), 767 "ARRAY_SORT": exp.SortArray.from_arg_list, 768 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 769 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 770 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 771 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 772 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 773 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 774 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 775 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 776 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 777 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 778 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 779 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 780 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 781 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 782 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 783 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 784 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 785 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 786 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 787 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 788 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 789 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 790 "DATE_TRUNC": _date_trunc_to_time, 791 "DATEADD": _build_date_time_add(exp.DateAdd), 792 "DATEDIFF": _build_datediff, 793 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 794 "DIV0": _build_if_from_div0, 795 "DIV0NULL": _build_if_from_div0null, 796 "EDITDISTANCE": lambda args: exp.Levenshtein( 797 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 798 ), 799 "FLATTEN": exp.Explode.from_arg_list, 800 "GET": exp.GetExtract.from_arg_list, 801 "GET_PATH": lambda args, dialect: exp.JSONExtract( 802 this=seq_get(args, 0), 803 expression=dialect.to_json_path(seq_get(args, 1)), 804 requires_json=True, 805 ), 806 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 807 "IFF": exp.If.from_arg_list, 808 "MD5_HEX": exp.MD5.from_arg_list, 809 "MD5_BINARY": exp.MD5Digest.from_arg_list, 810 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 811 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 812 "LAST_DAY": lambda args: exp.LastDay( 813 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 814 ), 815 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 816 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 817 "NULLIFZERO": _build_if_from_nullifzero, 818 "OBJECT_CONSTRUCT": _build_object_construct, 819 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 820 "PARSE_URL": lambda args: exp.ParseUrl( 821 this=seq_get(args, 0), permissive=seq_get(args, 1) 822 ), 823 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 824 "REGEXP_REPLACE": _build_regexp_replace, 825 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 826 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 827 "REPLACE": build_replace_with_optional_replacement, 828 "RLIKE": exp.RegexpLike.from_arg_list, 829 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 830 "SHA1_HEX": exp.SHA.from_arg_list, 831 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 832 "SHA2_HEX": exp.SHA2.from_arg_list, 833 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 834 "STRTOK": _build_strtok, 835 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 836 "TIMEADD": _build_date_time_add(exp.TimeAdd), 837 "TIMEDIFF": _build_datediff, 838 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 839 "TIMESTAMPDIFF": _build_datediff, 840 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 841 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 842 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 843 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 844 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 845 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 846 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 847 "TRY_TO_TIMESTAMP": _build_datetime( 848 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 849 ), 850 "TO_CHAR": build_timetostr_or_tochar, 851 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 852 "TO_NUMBER": lambda args: exp.ToNumber( 853 this=seq_get(args, 0), 854 format=seq_get(args, 1), 855 precision=seq_get(args, 2), 856 scale=seq_get(args, 3), 857 ), 858 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 859 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 860 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 861 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 862 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 863 "TO_VARCHAR": build_timetostr_or_tochar, 864 "TO_JSON": exp.JSONFormat.from_arg_list, 865 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 866 "ZEROIFNULL": _build_if_from_zeroifnull, 867 "LIKE": _build_like(exp.Like), 868 "ILIKE": _build_like(exp.ILike), 869 "SEARCH": _build_search, 870 } 871 FUNCTIONS.pop("PREDICT") 872 873 FUNCTION_PARSERS = { 874 **parser.Parser.FUNCTION_PARSERS, 875 "DATE_PART": lambda self: self._parse_date_part(), 876 "DIRECTORY": lambda self: self._parse_directory(), 877 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 878 "LISTAGG": lambda self: self._parse_string_agg(), 879 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 880 } 881 FUNCTION_PARSERS.pop("TRIM") 882 883 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 884 885 ALTER_PARSERS = { 886 **parser.Parser.ALTER_PARSERS, 887 "SESSION": lambda self: self._parse_alter_session(), 888 "UNSET": lambda self: self.expression( 889 exp.Set, 890 tag=self._match_text_seq("TAG"), 891 expressions=self._parse_csv(self._parse_id_var), 892 unset=True, 893 ), 894 } 895 896 STATEMENT_PARSERS = { 897 **parser.Parser.STATEMENT_PARSERS, 898 TokenType.GET: lambda self: self._parse_get(), 899 TokenType.PUT: lambda self: self._parse_put(), 900 TokenType.SHOW: lambda self: self._parse_show(), 901 } 902 903 PROPERTY_PARSERS = { 904 **parser.Parser.PROPERTY_PARSERS, 905 "CREDENTIALS": lambda self: self._parse_credentials_property(), 906 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 907 "LOCATION": lambda self: self._parse_location_property(), 908 "TAG": lambda self: self._parse_tag(), 909 "USING": lambda self: self._match_text_seq("TEMPLATE") 910 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 911 } 912 913 TYPE_CONVERTERS = { 914 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 915 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 916 } 917 918 SHOW_PARSERS = { 919 "DATABASES": _show_parser("DATABASES"), 920 "TERSE DATABASES": _show_parser("DATABASES"), 921 "SCHEMAS": _show_parser("SCHEMAS"), 922 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 923 "OBJECTS": _show_parser("OBJECTS"), 924 "TERSE OBJECTS": _show_parser("OBJECTS"), 925 "TABLES": _show_parser("TABLES"), 926 "TERSE TABLES": _show_parser("TABLES"), 927 "VIEWS": _show_parser("VIEWS"), 928 "TERSE VIEWS": _show_parser("VIEWS"), 929 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 930 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 931 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 932 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 933 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 934 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 935 "SEQUENCES": _show_parser("SEQUENCES"), 936 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 937 "STAGES": _show_parser("STAGES"), 938 "COLUMNS": _show_parser("COLUMNS"), 939 "USERS": _show_parser("USERS"), 940 "TERSE USERS": _show_parser("USERS"), 941 "FILE FORMATS": _show_parser("FILE FORMATS"), 942 "FUNCTIONS": _show_parser("FUNCTIONS"), 943 "PROCEDURES": _show_parser("PROCEDURES"), 944 "WAREHOUSES": _show_parser("WAREHOUSES"), 945 } 946 947 CONSTRAINT_PARSERS = { 948 **parser.Parser.CONSTRAINT_PARSERS, 949 "WITH": lambda self: self._parse_with_constraint(), 950 "MASKING": lambda self: self._parse_with_constraint(), 951 "PROJECTION": lambda self: self._parse_with_constraint(), 952 "TAG": lambda self: self._parse_with_constraint(), 953 } 954 955 STAGED_FILE_SINGLE_TOKENS = { 956 TokenType.DOT, 957 TokenType.MOD, 958 TokenType.SLASH, 959 } 960 961 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 962 963 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 964 965 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 966 967 LAMBDAS = { 968 **parser.Parser.LAMBDAS, 969 TokenType.ARROW: lambda self, expressions: self.expression( 970 exp.Lambda, 971 this=self._replace_lambda( 972 self._parse_assignment(), 973 expressions, 974 ), 975 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 976 ), 977 } 978 979 COLUMN_OPERATORS = { 980 **parser.Parser.COLUMN_OPERATORS, 981 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 982 exp.ModelAttribute, this=this, expression=attr 983 ), 984 } 985 986 def _parse_directory(self) -> exp.DirectoryStage: 987 table = self._parse_table_parts() 988 989 if isinstance(table, exp.Table): 990 table = table.this 991 992 return self.expression(exp.DirectoryStage, this=table) 993 994 def _parse_use(self) -> exp.Use: 995 if self._match_text_seq("SECONDARY", "ROLES"): 996 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 997 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 998 return self.expression( 999 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1000 ) 1001 1002 return super()._parse_use() 1003 1004 def _negate_range( 1005 self, this: t.Optional[exp.Expression] = None 1006 ) -> t.Optional[exp.Expression]: 1007 if not this: 1008 return this 1009 1010 query = this.args.get("query") 1011 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1012 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1013 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1014 # which can produce different results (most likely a SnowFlake bug). 1015 # 1016 # https://docs.snowflake.com/en/sql-reference/functions/in 1017 # Context: https://github.com/tobymao/sqlglot/issues/3890 1018 return self.expression( 1019 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1020 ) 1021 1022 return self.expression(exp.Not, this=this) 1023 1024 def _parse_tag(self) -> exp.Tags: 1025 return self.expression( 1026 exp.Tags, 1027 expressions=self._parse_wrapped_csv(self._parse_property), 1028 ) 1029 1030 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1031 if self._prev.token_type != TokenType.WITH: 1032 self._retreat(self._index - 1) 1033 1034 if self._match_text_seq("MASKING", "POLICY"): 1035 policy = self._parse_column() 1036 return self.expression( 1037 exp.MaskingPolicyColumnConstraint, 1038 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1039 expressions=self._match(TokenType.USING) 1040 and self._parse_wrapped_csv(self._parse_id_var), 1041 ) 1042 if self._match_text_seq("PROJECTION", "POLICY"): 1043 policy = self._parse_column() 1044 return self.expression( 1045 exp.ProjectionPolicyColumnConstraint, 1046 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1047 ) 1048 if self._match(TokenType.TAG): 1049 return self._parse_tag() 1050 1051 return None 1052 1053 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1054 if self._match(TokenType.TAG): 1055 return self._parse_tag() 1056 1057 return super()._parse_with_property() 1058 1059 def _parse_create(self) -> exp.Create | exp.Command: 1060 expression = super()._parse_create() 1061 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1062 # Replace the Table node with the enclosed Identifier 1063 expression.this.replace(expression.this.this) 1064 1065 return expression 1066 1067 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1068 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1069 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1070 this = self._parse_var() or self._parse_type() 1071 1072 if not this: 1073 return None 1074 1075 self._match(TokenType.COMMA) 1076 expression = self._parse_bitwise() 1077 this = map_date_part(this) 1078 name = this.name.upper() 1079 1080 if name.startswith("EPOCH"): 1081 if name == "EPOCH_MILLISECOND": 1082 scale = 10**3 1083 elif name == "EPOCH_MICROSECOND": 1084 scale = 10**6 1085 elif name == "EPOCH_NANOSECOND": 1086 scale = 10**9 1087 else: 1088 scale = None 1089 1090 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 1091 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 1092 1093 if scale: 1094 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 1095 1096 return to_unix 1097 1098 return self.expression(exp.Extract, this=this, expression=expression) 1099 1100 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1101 if is_map: 1102 # Keys are strings in Snowflake's objects, see also: 1103 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1104 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1105 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1106 1107 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1108 1109 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1110 lateral = super()._parse_lateral() 1111 if not lateral: 1112 return lateral 1113 1114 if isinstance(lateral.this, exp.Explode): 1115 table_alias = lateral.args.get("alias") 1116 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1117 if table_alias and not table_alias.args.get("columns"): 1118 table_alias.set("columns", columns) 1119 elif not table_alias: 1120 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1121 1122 return lateral 1123 1124 def _parse_table_parts( 1125 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1126 ) -> exp.Table: 1127 # https://docs.snowflake.com/en/user-guide/querying-stage 1128 if self._match(TokenType.STRING, advance=False): 1129 table = self._parse_string() 1130 elif self._match_text_seq("@", advance=False): 1131 table = self._parse_location_path() 1132 else: 1133 table = None 1134 1135 if table: 1136 file_format = None 1137 pattern = None 1138 1139 wrapped = self._match(TokenType.L_PAREN) 1140 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1141 if self._match_text_seq("FILE_FORMAT", "=>"): 1142 file_format = self._parse_string() or super()._parse_table_parts( 1143 is_db_reference=is_db_reference 1144 ) 1145 elif self._match_text_seq("PATTERN", "=>"): 1146 pattern = self._parse_string() 1147 else: 1148 break 1149 1150 self._match(TokenType.COMMA) 1151 1152 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1153 else: 1154 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1155 1156 return table 1157 1158 def _parse_table( 1159 self, 1160 schema: bool = False, 1161 joins: bool = False, 1162 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1163 parse_bracket: bool = False, 1164 is_db_reference: bool = False, 1165 parse_partition: bool = False, 1166 consume_pipe: bool = False, 1167 ) -> t.Optional[exp.Expression]: 1168 table = super()._parse_table( 1169 schema=schema, 1170 joins=joins, 1171 alias_tokens=alias_tokens, 1172 parse_bracket=parse_bracket, 1173 is_db_reference=is_db_reference, 1174 parse_partition=parse_partition, 1175 ) 1176 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1177 table_from_rows = table.this 1178 for arg in exp.TableFromRows.arg_types: 1179 if arg != "this": 1180 table_from_rows.set(arg, table.args.get(arg)) 1181 1182 table = table_from_rows 1183 1184 return table 1185 1186 def _parse_id_var( 1187 self, 1188 any_token: bool = True, 1189 tokens: t.Optional[t.Collection[TokenType]] = None, 1190 ) -> t.Optional[exp.Expression]: 1191 if self._match_text_seq("IDENTIFIER", "("): 1192 identifier = ( 1193 super()._parse_id_var(any_token=any_token, tokens=tokens) 1194 or self._parse_string() 1195 ) 1196 self._match_r_paren() 1197 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1198 1199 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1200 1201 def _parse_show_snowflake(self, this: str) -> exp.Show: 1202 scope = None 1203 scope_kind = None 1204 1205 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1206 # which is syntactically valid but has no effect on the output 1207 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1208 1209 history = self._match_text_seq("HISTORY") 1210 1211 like = self._parse_string() if self._match(TokenType.LIKE) else None 1212 1213 if self._match(TokenType.IN): 1214 if self._match_text_seq("ACCOUNT"): 1215 scope_kind = "ACCOUNT" 1216 elif self._match_text_seq("CLASS"): 1217 scope_kind = "CLASS" 1218 scope = self._parse_table_parts() 1219 elif self._match_text_seq("APPLICATION"): 1220 scope_kind = "APPLICATION" 1221 if self._match_text_seq("PACKAGE"): 1222 scope_kind += " PACKAGE" 1223 scope = self._parse_table_parts() 1224 elif self._match_set(self.DB_CREATABLES): 1225 scope_kind = self._prev.text.upper() 1226 if self._curr: 1227 scope = self._parse_table_parts() 1228 elif self._curr: 1229 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1230 scope = self._parse_table_parts() 1231 1232 return self.expression( 1233 exp.Show, 1234 **{ 1235 "terse": terse, 1236 "this": this, 1237 "history": history, 1238 "like": like, 1239 "scope": scope, 1240 "scope_kind": scope_kind, 1241 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1242 "limit": self._parse_limit(), 1243 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1244 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1245 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1246 }, 1247 ) 1248 1249 def _parse_put(self) -> exp.Put | exp.Command: 1250 if self._curr.token_type != TokenType.STRING: 1251 return self._parse_as_command(self._prev) 1252 1253 return self.expression( 1254 exp.Put, 1255 this=self._parse_string(), 1256 target=self._parse_location_path(), 1257 properties=self._parse_properties(), 1258 ) 1259 1260 def _parse_get(self) -> t.Optional[exp.Expression]: 1261 start = self._prev 1262 1263 # If we detect GET( then we need to parse a function, not a statement 1264 if self._match(TokenType.L_PAREN): 1265 self._retreat(self._index - 2) 1266 return self._parse_expression() 1267 1268 target = self._parse_location_path() 1269 1270 # Parse as command if unquoted file path 1271 if self._curr.token_type == TokenType.URI_START: 1272 return self._parse_as_command(start) 1273 1274 return self.expression( 1275 exp.Get, 1276 this=self._parse_string(), 1277 target=target, 1278 properties=self._parse_properties(), 1279 ) 1280 1281 def _parse_location_property(self) -> exp.LocationProperty: 1282 self._match(TokenType.EQ) 1283 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1284 1285 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1286 # Parse either a subquery or a staged file 1287 return ( 1288 self._parse_select(table=True, parse_subquery_alias=False) 1289 if self._match(TokenType.L_PAREN, advance=False) 1290 else self._parse_table_parts() 1291 ) 1292 1293 def _parse_location_path(self) -> exp.Var: 1294 start = self._curr 1295 self._advance_any(ignore_reserved=True) 1296 1297 # We avoid consuming a comma token because external tables like @foo and @bar 1298 # can be joined in a query with a comma separator, as well as closing paren 1299 # in case of subqueries 1300 while self._is_connected() and not self._match_set( 1301 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1302 ): 1303 self._advance_any(ignore_reserved=True) 1304 1305 return exp.var(self._find_sql(start, self._prev)) 1306 1307 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1308 this = super()._parse_lambda_arg() 1309 1310 if not this: 1311 return this 1312 1313 typ = self._parse_types() 1314 1315 if typ: 1316 return self.expression(exp.Cast, this=this, to=typ) 1317 1318 return this 1319 1320 def _parse_foreign_key(self) -> exp.ForeignKey: 1321 # inlineFK, the REFERENCES columns are implied 1322 if self._match(TokenType.REFERENCES, advance=False): 1323 return self.expression(exp.ForeignKey) 1324 1325 # outoflineFK, explicitly names the columns 1326 return super()._parse_foreign_key() 1327 1328 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1329 self._match(TokenType.EQ) 1330 if self._match(TokenType.L_PAREN, advance=False): 1331 expressions = self._parse_wrapped_options() 1332 else: 1333 expressions = [self._parse_format_name()] 1334 1335 return self.expression( 1336 exp.FileFormatProperty, 1337 expressions=expressions, 1338 ) 1339 1340 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1341 return self.expression( 1342 exp.CredentialsProperty, 1343 expressions=self._parse_wrapped_options(), 1344 ) 1345 1346 def _parse_semantic_view(self) -> exp.SemanticView: 1347 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1348 1349 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1350 if self._match_text_seq("DIMENSIONS"): 1351 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1352 if self._match_text_seq("METRICS"): 1353 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1354 if self._match_text_seq("WHERE"): 1355 kwargs["where"] = self._parse_expression() 1356 1357 return self.expression(exp.SemanticView, **kwargs) 1358 1359 class Tokenizer(tokens.Tokenizer): 1360 STRING_ESCAPES = ["\\", "'"] 1361 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1362 RAW_STRINGS = ["$$"] 1363 COMMENTS = ["--", "//", ("/*", "*/")] 1364 NESTED_COMMENTS = False 1365 1366 KEYWORDS = { 1367 **tokens.Tokenizer.KEYWORDS, 1368 "BYTEINT": TokenType.INT, 1369 "FILE://": TokenType.URI_START, 1370 "FILE FORMAT": TokenType.FILE_FORMAT, 1371 "GET": TokenType.GET, 1372 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1373 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1374 "MINUS": TokenType.EXCEPT, 1375 "NCHAR VARYING": TokenType.VARCHAR, 1376 "PUT": TokenType.PUT, 1377 "REMOVE": TokenType.COMMAND, 1378 "RM": TokenType.COMMAND, 1379 "SAMPLE": TokenType.TABLE_SAMPLE, 1380 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1381 "SQL_DOUBLE": TokenType.DOUBLE, 1382 "SQL_VARCHAR": TokenType.VARCHAR, 1383 "STAGE": TokenType.STAGE, 1384 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1385 "STREAMLIT": TokenType.STREAMLIT, 1386 "TAG": TokenType.TAG, 1387 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1388 "TOP": TokenType.TOP, 1389 "WAREHOUSE": TokenType.WAREHOUSE, 1390 } 1391 KEYWORDS.pop("/*+") 1392 1393 SINGLE_TOKENS = { 1394 **tokens.Tokenizer.SINGLE_TOKENS, 1395 "$": TokenType.PARAMETER, 1396 "!": TokenType.EXCLAMATION, 1397 } 1398 1399 VAR_SINGLE_TOKENS = {"$"} 1400 1401 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1402 1403 class Generator(generator.Generator): 1404 PARAMETER_TOKEN = "$" 1405 MATCHED_BY_SOURCE = False 1406 SINGLE_STRING_INTERVAL = True 1407 JOIN_HINTS = False 1408 TABLE_HINTS = False 1409 QUERY_HINTS = False 1410 AGGREGATE_FILTER_SUPPORTED = False 1411 SUPPORTS_TABLE_COPY = False 1412 COLLATE_IS_FUNC = True 1413 LIMIT_ONLY_LITERALS = True 1414 JSON_KEY_VALUE_PAIR_SEP = "," 1415 INSERT_OVERWRITE = " OVERWRITE INTO" 1416 STRUCT_DELIMITER = ("(", ")") 1417 COPY_PARAMS_ARE_WRAPPED = False 1418 COPY_PARAMS_EQ_REQUIRED = True 1419 STAR_EXCEPT = "EXCLUDE" 1420 SUPPORTS_EXPLODING_PROJECTIONS = False 1421 ARRAY_CONCAT_IS_VAR_LEN = False 1422 SUPPORTS_CONVERT_TIMEZONE = True 1423 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1424 SUPPORTS_MEDIAN = True 1425 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1426 SUPPORTS_DECODE_CASE = True 1427 IS_BOOL_ALLOWED = False 1428 1429 TRANSFORMS = { 1430 **generator.Generator.TRANSFORMS, 1431 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1432 exp.ArgMax: rename_func("MAX_BY"), 1433 exp.ArgMin: rename_func("MIN_BY"), 1434 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1435 exp.ArrayContains: lambda self, e: self.func( 1436 "ARRAY_CONTAINS", 1437 e.expression 1438 if e.args.get("ensure_variant") is False 1439 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1440 e.this, 1441 ), 1442 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1443 exp.AtTimeZone: lambda self, e: self.func( 1444 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1445 ), 1446 exp.BitwiseOr: rename_func("BITOR"), 1447 exp.BitwiseXor: rename_func("BITXOR"), 1448 exp.BitwiseAnd: rename_func("BITAND"), 1449 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1450 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1451 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1452 exp.BitwiseNot: rename_func("BITNOT"), 1453 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1454 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1455 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1456 exp.DateAdd: date_delta_sql("DATEADD"), 1457 exp.DateDiff: date_delta_sql("DATEDIFF"), 1458 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1459 exp.DatetimeDiff: timestampdiff_sql, 1460 exp.DateStrToDate: datestrtodate_sql, 1461 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1462 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1463 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1464 exp.DayOfYear: rename_func("DAYOFYEAR"), 1465 exp.Explode: rename_func("FLATTEN"), 1466 exp.Extract: lambda self, e: self.func( 1467 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1468 ), 1469 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1470 exp.FileFormatProperty: lambda self, 1471 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1472 exp.FromTimeZone: lambda self, e: self.func( 1473 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1474 ), 1475 exp.GenerateSeries: lambda self, e: self.func( 1476 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1477 ), 1478 exp.GetExtract: rename_func("GET"), 1479 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1480 exp.If: if_sql(name="IFF", false_value="NULL"), 1481 exp.JSONExtractArray: _json_extract_value_array_sql, 1482 exp.JSONExtractScalar: lambda self, e: self.func( 1483 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1484 ), 1485 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1486 exp.JSONPathRoot: lambda *_: "", 1487 exp.JSONValueArray: _json_extract_value_array_sql, 1488 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1489 rename_func("EDITDISTANCE") 1490 ), 1491 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1492 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1493 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1494 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1495 exp.MakeInterval: no_make_interval_sql, 1496 exp.Max: max_or_greatest, 1497 exp.Min: min_or_least, 1498 exp.ParseJSON: lambda self, e: self.func( 1499 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1500 ), 1501 exp.JSONFormat: rename_func("TO_JSON"), 1502 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1503 exp.PercentileCont: transforms.preprocess( 1504 [transforms.add_within_group_for_percentiles] 1505 ), 1506 exp.PercentileDisc: transforms.preprocess( 1507 [transforms.add_within_group_for_percentiles] 1508 ), 1509 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1510 exp.RegexpExtract: _regexpextract_sql, 1511 exp.RegexpExtractAll: _regexpextract_sql, 1512 exp.RegexpILike: _regexpilike_sql, 1513 exp.Rand: rename_func("RANDOM"), 1514 exp.Select: transforms.preprocess( 1515 [ 1516 transforms.eliminate_window_clause, 1517 transforms.eliminate_distinct_on, 1518 transforms.explode_projection_to_unnest(), 1519 transforms.eliminate_semi_and_anti_joins, 1520 _transform_generate_date_array, 1521 _qualify_unnested_columns, 1522 _eliminate_dot_variant_lookup, 1523 ] 1524 ), 1525 exp.SHA: rename_func("SHA1"), 1526 exp.MD5Digest: rename_func("MD5_BINARY"), 1527 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1528 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1529 exp.LowerHex: rename_func("TO_CHAR"), 1530 exp.SortArray: rename_func("ARRAY_SORT"), 1531 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1532 exp.StartsWith: rename_func("STARTSWITH"), 1533 exp.EndsWith: rename_func("ENDSWITH"), 1534 exp.StrPosition: lambda self, e: strposition_sql( 1535 self, e, func_name="CHARINDEX", supports_position=True 1536 ), 1537 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1538 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1539 exp.Stuff: rename_func("INSERT"), 1540 exp.StPoint: rename_func("ST_MAKEPOINT"), 1541 exp.TimeAdd: date_delta_sql("TIMEADD"), 1542 exp.Timestamp: no_timestamp_sql, 1543 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1544 exp.TimestampDiff: lambda self, e: self.func( 1545 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1546 ), 1547 exp.TimestampTrunc: timestamptrunc_sql(), 1548 exp.TimeStrToTime: timestrtotime_sql, 1549 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1550 exp.ToArray: rename_func("TO_ARRAY"), 1551 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1552 exp.ToDouble: rename_func("TO_DOUBLE"), 1553 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1554 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1555 exp.TsOrDsToDate: lambda self, e: self.func( 1556 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1557 ), 1558 exp.TsOrDsToTime: lambda self, e: self.func( 1559 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1560 ), 1561 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1562 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1563 exp.Uuid: rename_func("UUID_STRING"), 1564 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1565 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1566 exp.Xor: rename_func("BOOLXOR"), 1567 exp.ByteLength: rename_func("OCTET_LENGTH"), 1568 } 1569 1570 SUPPORTED_JSON_PATH_PARTS = { 1571 exp.JSONPathKey, 1572 exp.JSONPathRoot, 1573 exp.JSONPathSubscript, 1574 } 1575 1576 TYPE_MAPPING = { 1577 **generator.Generator.TYPE_MAPPING, 1578 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1579 exp.DataType.Type.NESTED: "OBJECT", 1580 exp.DataType.Type.STRUCT: "OBJECT", 1581 exp.DataType.Type.TEXT: "VARCHAR", 1582 } 1583 1584 TOKEN_MAPPING = { 1585 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1586 } 1587 1588 PROPERTIES_LOCATION = { 1589 **generator.Generator.PROPERTIES_LOCATION, 1590 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1591 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1592 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1593 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1594 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1595 } 1596 1597 UNSUPPORTED_VALUES_EXPRESSIONS = { 1598 exp.Map, 1599 exp.StarMap, 1600 exp.Struct, 1601 exp.VarMap, 1602 } 1603 1604 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1605 1606 def with_properties(self, properties: exp.Properties) -> str: 1607 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1608 1609 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1610 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1611 values_as_table = False 1612 1613 return super().values_sql(expression, values_as_table=values_as_table) 1614 1615 def datatype_sql(self, expression: exp.DataType) -> str: 1616 expressions = expression.expressions 1617 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1618 for field_type in expressions: 1619 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1620 if isinstance(field_type, exp.DataType): 1621 return "OBJECT" 1622 if ( 1623 isinstance(field_type, exp.ColumnDef) 1624 and field_type.this 1625 and field_type.this.is_string 1626 ): 1627 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1628 # converting 'foo' into an identifier, we also need to quote it because these 1629 # keys are case-sensitive. For example: 1630 # 1631 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1632 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1633 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1634 1635 return super().datatype_sql(expression) 1636 1637 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1638 return self.func( 1639 "TO_NUMBER", 1640 expression.this, 1641 expression.args.get("format"), 1642 expression.args.get("precision"), 1643 expression.args.get("scale"), 1644 ) 1645 1646 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1647 milli = expression.args.get("milli") 1648 if milli is not None: 1649 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1650 expression.set("nano", milli_to_nano) 1651 1652 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1653 1654 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1655 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1656 return self.func("TO_GEOGRAPHY", expression.this) 1657 if expression.is_type(exp.DataType.Type.GEOMETRY): 1658 return self.func("TO_GEOMETRY", expression.this) 1659 1660 return super().cast_sql(expression, safe_prefix=safe_prefix) 1661 1662 def trycast_sql(self, expression: exp.TryCast) -> str: 1663 value = expression.this 1664 1665 if value.type is None: 1666 from sqlglot.optimizer.annotate_types import annotate_types 1667 1668 value = annotate_types(value, dialect=self.dialect) 1669 1670 # Snowflake requires that TRY_CAST's value be a string 1671 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1672 # if we can deduce that the value is a string, then we can generate TRY_CAST 1673 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1674 return super().trycast_sql(expression) 1675 1676 return self.cast_sql(expression) 1677 1678 def log_sql(self, expression: exp.Log) -> str: 1679 if not expression.expression: 1680 return self.func("LN", expression.this) 1681 1682 return super().log_sql(expression) 1683 1684 def unnest_sql(self, expression: exp.Unnest) -> str: 1685 unnest_alias = expression.args.get("alias") 1686 offset = expression.args.get("offset") 1687 1688 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1689 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1690 1691 columns = [ 1692 exp.to_identifier("seq"), 1693 exp.to_identifier("key"), 1694 exp.to_identifier("path"), 1695 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1696 value, 1697 exp.to_identifier("this"), 1698 ] 1699 1700 if unnest_alias: 1701 unnest_alias.set("columns", columns) 1702 else: 1703 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1704 1705 table_input = self.sql(expression.expressions[0]) 1706 if not table_input.startswith("INPUT =>"): 1707 table_input = f"INPUT => {table_input}" 1708 1709 expression_parent = expression.parent 1710 1711 explode = ( 1712 f"FLATTEN({table_input})" 1713 if isinstance(expression_parent, exp.Lateral) 1714 else f"TABLE(FLATTEN({table_input}))" 1715 ) 1716 alias = self.sql(unnest_alias) 1717 alias = f" AS {alias}" if alias else "" 1718 value = ( 1719 "" 1720 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1721 else f"{value} FROM " 1722 ) 1723 1724 return f"{value}{explode}{alias}" 1725 1726 def show_sql(self, expression: exp.Show) -> str: 1727 terse = "TERSE " if expression.args.get("terse") else "" 1728 history = " HISTORY" if expression.args.get("history") else "" 1729 like = self.sql(expression, "like") 1730 like = f" LIKE {like}" if like else "" 1731 1732 scope = self.sql(expression, "scope") 1733 scope = f" {scope}" if scope else "" 1734 1735 scope_kind = self.sql(expression, "scope_kind") 1736 if scope_kind: 1737 scope_kind = f" IN {scope_kind}" 1738 1739 starts_with = self.sql(expression, "starts_with") 1740 if starts_with: 1741 starts_with = f" STARTS WITH {starts_with}" 1742 1743 limit = self.sql(expression, "limit") 1744 1745 from_ = self.sql(expression, "from") 1746 if from_: 1747 from_ = f" FROM {from_}" 1748 1749 privileges = self.expressions(expression, key="privileges", flat=True) 1750 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1751 1752 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1753 1754 def describe_sql(self, expression: exp.Describe) -> str: 1755 # Default to table if kind is unknown 1756 kind_value = expression.args.get("kind") or "TABLE" 1757 kind = f" {kind_value}" if kind_value else "" 1758 this = f" {self.sql(expression, 'this')}" 1759 expressions = self.expressions(expression, flat=True) 1760 expressions = f" {expressions}" if expressions else "" 1761 return f"DESCRIBE{kind}{this}{expressions}" 1762 1763 def generatedasidentitycolumnconstraint_sql( 1764 self, expression: exp.GeneratedAsIdentityColumnConstraint 1765 ) -> str: 1766 start = expression.args.get("start") 1767 start = f" START {start}" if start else "" 1768 increment = expression.args.get("increment") 1769 increment = f" INCREMENT {increment}" if increment else "" 1770 1771 order = expression.args.get("order") 1772 if order is not None: 1773 order_clause = " ORDER" if order else " NOORDER" 1774 else: 1775 order_clause = "" 1776 1777 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1778 1779 def cluster_sql(self, expression: exp.Cluster) -> str: 1780 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1781 1782 def struct_sql(self, expression: exp.Struct) -> str: 1783 if len(expression.expressions) == 1: 1784 arg = expression.expressions[0] 1785 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1786 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1787 return f"{{{self.sql(expression.expressions[0])}}}" 1788 1789 keys = [] 1790 values = [] 1791 1792 for i, e in enumerate(expression.expressions): 1793 if isinstance(e, exp.PropertyEQ): 1794 keys.append( 1795 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1796 ) 1797 values.append(e.expression) 1798 else: 1799 keys.append(exp.Literal.string(f"_{i}")) 1800 values.append(e) 1801 1802 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1803 1804 @unsupported_args("weight", "accuracy") 1805 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1806 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1807 1808 def alterset_sql(self, expression: exp.AlterSet) -> str: 1809 exprs = self.expressions(expression, flat=True) 1810 exprs = f" {exprs}" if exprs else "" 1811 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1812 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1813 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1814 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1815 tag = self.expressions(expression, key="tag", flat=True) 1816 tag = f" TAG {tag}" if tag else "" 1817 1818 return f"SET{exprs}{file_format}{copy_options}{tag}" 1819 1820 def strtotime_sql(self, expression: exp.StrToTime): 1821 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1822 return self.func( 1823 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1824 ) 1825 1826 def timestampsub_sql(self, expression: exp.TimestampSub): 1827 return self.sql( 1828 exp.TimestampAdd( 1829 this=expression.this, 1830 expression=expression.expression * -1, 1831 unit=expression.unit, 1832 ) 1833 ) 1834 1835 def jsonextract_sql(self, expression: exp.JSONExtract): 1836 this = expression.this 1837 1838 # JSON strings are valid coming from other dialects such as BQ so 1839 # for these cases we PARSE_JSON preemptively 1840 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1841 "requires_json" 1842 ): 1843 this = exp.ParseJSON(this=this) 1844 1845 return self.func( 1846 "GET_PATH", 1847 this, 1848 expression.expression, 1849 ) 1850 1851 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1852 this = expression.this 1853 if this.is_string: 1854 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1855 1856 return self.func("TO_CHAR", this, self.format_time(expression)) 1857 1858 def datesub_sql(self, expression: exp.DateSub) -> str: 1859 value = expression.expression 1860 if value: 1861 value.replace(value * (-1)) 1862 else: 1863 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1864 1865 return date_delta_sql("DATEADD")(self, expression) 1866 1867 def select_sql(self, expression: exp.Select) -> str: 1868 limit = expression.args.get("limit") 1869 offset = expression.args.get("offset") 1870 if offset and not limit: 1871 expression.limit(exp.Null(), copy=False) 1872 return super().select_sql(expression) 1873 1874 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1875 is_materialized = expression.find(exp.MaterializedProperty) 1876 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1877 1878 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1879 # For materialized views, COPY GRANTS is located *before* the columns list 1880 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1881 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1882 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1883 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1884 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1885 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1886 1887 this_name = self.sql(expression.this, "this") 1888 copy_grants = self.sql(copy_grants_property) 1889 this_schema = self.schema_columns_sql(expression.this) 1890 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1891 1892 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1893 1894 return super().createable_sql(expression, locations) 1895 1896 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1897 this = expression.this 1898 1899 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1900 # and add it later as part of the WITHIN GROUP clause 1901 order = this if isinstance(this, exp.Order) else None 1902 if order: 1903 expression.set("this", order.this.pop()) 1904 1905 expr_sql = super().arrayagg_sql(expression) 1906 1907 if order: 1908 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1909 1910 return expr_sql 1911 1912 def array_sql(self, expression: exp.Array) -> str: 1913 expressions = expression.expressions 1914 1915 first_expr = seq_get(expressions, 0) 1916 if isinstance(first_expr, exp.Select): 1917 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1918 if first_expr.text("kind").upper() == "STRUCT": 1919 object_construct_args = [] 1920 for expr in first_expr.expressions: 1921 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1922 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1923 name = expr.this if isinstance(expr, exp.Alias) else expr 1924 1925 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1926 1927 array_agg = exp.ArrayAgg( 1928 this=_build_object_construct(args=object_construct_args) 1929 ) 1930 1931 first_expr.set("kind", None) 1932 first_expr.set("expressions", [array_agg]) 1933 1934 return self.sql(first_expr.subquery()) 1935 1936 return inline_array_sql(self, expression) 1937 1938 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1939 zone = self.sql(expression, "this") 1940 if not zone: 1941 return super().currentdate_sql(expression) 1942 1943 expr = exp.Cast( 1944 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1945 to=exp.DataType(this=exp.DataType.Type.DATE), 1946 ) 1947 return self.sql(expr) 1948 1949 def dot_sql(self, expression: exp.Dot) -> str: 1950 this = expression.this 1951 1952 if not this.type: 1953 from sqlglot.optimizer.annotate_types import annotate_types 1954 1955 this = annotate_types(this, dialect=self.dialect) 1956 1957 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1958 # Generate colon notation for the top level STRUCT 1959 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1960 1961 return super().dot_sql(expression) 1962 1963 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1964 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1965 1966 def format_sql(self, expression: exp.Format) -> str: 1967 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1968 return self.func("TO_CHAR", expression.expressions[0]) 1969 1970 return self.function_fallback_sql(expression) 1971 1972 def splitpart_sql(self, expression: exp.SplitPart) -> str: 1973 # Set part_index to 1 if missing 1974 if not expression.args.get("delimiter"): 1975 expression.set("delimiter", exp.Literal.string(" ")) 1976 1977 if not expression.args.get("part_index"): 1978 expression.set("part_index", exp.Literal.number(1)) 1979 1980 return rename_func("SPLIT_PART")(self, expression)
551class Snowflake(Dialect): 552 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 553 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 554 NULL_ORDERING = "nulls_are_large" 555 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 556 SUPPORTS_USER_DEFINED_TYPES = False 557 SUPPORTS_SEMI_ANTI_JOIN = False 558 PREFER_CTE_ALIAS_COLUMN = True 559 TABLESAMPLE_SIZE_IS_PERCENT = True 560 COPY_PARAMS_ARE_CSV = False 561 ARRAY_AGG_INCLUDES_NULLS = None 562 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 563 TRY_CAST_REQUIRES_STRING = True 564 565 TYPE_TO_EXPRESSIONS = { 566 **Dialect.TYPE_TO_EXPRESSIONS, 567 exp.DataType.Type.DOUBLE: { 568 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DOUBLE], 569 exp.Cos, 570 exp.Cosh, 571 exp.Cot, 572 exp.Degrees, 573 exp.Exp, 574 exp.Sin, 575 exp.Tan, 576 exp.Asin, 577 exp.Atanh, 578 exp.Cbrt, 579 }, 580 exp.DataType.Type.INT: { 581 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 582 exp.Ascii, 583 exp.ByteLength, 584 exp.Length, 585 exp.RtrimmedLength, 586 exp.BitLength, 587 exp.Levenshtein, 588 exp.JarowinklerSimilarity, 589 exp.StrPosition, 590 exp.Unicode, 591 }, 592 exp.DataType.Type.VARCHAR: { 593 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 594 exp.Base64DecodeString, 595 exp.TryBase64DecodeString, 596 exp.Base64Encode, 597 exp.DecompressString, 598 exp.MD5, 599 exp.AIAgg, 600 exp.AIClassify, 601 exp.AISummarizeAgg, 602 exp.Chr, 603 exp.Collate, 604 exp.Collation, 605 exp.HexDecodeString, 606 exp.TryHexDecodeString, 607 exp.HexEncode, 608 exp.Initcap, 609 exp.RegexpExtract, 610 exp.RegexpReplace, 611 exp.Repeat, 612 exp.Replace, 613 exp.SHA, 614 exp.SHA2, 615 exp.Soundex, 616 exp.SoundexP123, 617 exp.Space, 618 exp.SplitPart, 619 exp.Translate, 620 exp.Uuid, 621 }, 622 exp.DataType.Type.BINARY: { 623 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 624 exp.Base64DecodeBinary, 625 exp.TryBase64DecodeBinary, 626 exp.TryHexDecodeBinary, 627 exp.Compress, 628 exp.DecompressBinary, 629 exp.MD5Digest, 630 exp.SHA1Digest, 631 exp.SHA2Digest, 632 exp.Unhex, 633 }, 634 exp.DataType.Type.BIGINT: { 635 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 636 exp.Factorial, 637 exp.MD5NumberLower64, 638 exp.MD5NumberUpper64, 639 }, 640 exp.DataType.Type.ARRAY: { 641 exp.Split, 642 exp.RegexpExtractAll, 643 exp.StringToArray, 644 }, 645 exp.DataType.Type.OBJECT: { 646 exp.ParseUrl, 647 exp.ParseIp, 648 }, 649 exp.DataType.Type.DECIMAL: { 650 exp.RegexpCount, 651 }, 652 exp.DataType.Type.BOOLEAN: { 653 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN], 654 exp.Search, 655 }, 656 } 657 658 ANNOTATORS = { 659 **Dialect.ANNOTATORS, 660 **{ 661 expr_type: annotate_with_type_lambda(data_type) 662 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 663 for expr_type in expressions 664 }, 665 **{ 666 expr_type: lambda self, e: self._annotate_by_args(e, "this") 667 for expr_type in ( 668 exp.Floor, 669 exp.Left, 670 exp.Pad, 671 exp.Right, 672 exp.Stuff, 673 exp.Substring, 674 exp.Round, 675 ) 676 }, 677 **{ 678 expr_type: lambda self, e: self._annotate_with_type( 679 e, exp.DataType.build("NUMBER", dialect="snowflake") 680 ) 681 for expr_type in ( 682 exp.RegexpCount, 683 exp.RegexpInstr, 684 ) 685 }, 686 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 687 exp.Reverse: _annotate_reverse, 688 } 689 690 TIME_MAPPING = { 691 "YYYY": "%Y", 692 "yyyy": "%Y", 693 "YY": "%y", 694 "yy": "%y", 695 "MMMM": "%B", 696 "mmmm": "%B", 697 "MON": "%b", 698 "mon": "%b", 699 "MM": "%m", 700 "mm": "%m", 701 "DD": "%d", 702 "dd": "%-d", 703 "DY": "%a", 704 "dy": "%w", 705 "HH24": "%H", 706 "hh24": "%H", 707 "HH12": "%I", 708 "hh12": "%I", 709 "MI": "%M", 710 "mi": "%M", 711 "SS": "%S", 712 "ss": "%S", 713 "FF6": "%f", 714 "ff6": "%f", 715 } 716 717 DATE_PART_MAPPING = { 718 **Dialect.DATE_PART_MAPPING, 719 "ISOWEEK": "WEEKISO", 720 } 721 722 def quote_identifier(self, expression: E, identify: bool = True) -> E: 723 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 724 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 725 if ( 726 isinstance(expression, exp.Identifier) 727 and isinstance(expression.parent, exp.Table) 728 and expression.name.lower() == "dual" 729 ): 730 return expression # type: ignore 731 732 return super().quote_identifier(expression, identify=identify) 733 734 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 735 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 736 SINGLE_TOKENS.pop("$") 737 738 class Parser(parser.Parser): 739 IDENTIFY_PIVOT_STRINGS = True 740 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 741 COLON_IS_VARIANT_EXTRACT = True 742 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 743 744 ID_VAR_TOKENS = { 745 *parser.Parser.ID_VAR_TOKENS, 746 TokenType.EXCEPT, 747 TokenType.MATCH_CONDITION, 748 } 749 750 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 751 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 752 753 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 754 755 FUNCTIONS = { 756 **parser.Parser.FUNCTIONS, 757 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 758 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 759 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 760 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 761 ), 762 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 763 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 764 start=seq_get(args, 0), 765 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 766 step=seq_get(args, 2), 767 ), 768 "ARRAY_SORT": exp.SortArray.from_arg_list, 769 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 770 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 771 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 772 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 773 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 774 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 775 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 776 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 777 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 778 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 779 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 780 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 781 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 782 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 783 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 784 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 785 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 786 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 787 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 788 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 789 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 790 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 791 "DATE_TRUNC": _date_trunc_to_time, 792 "DATEADD": _build_date_time_add(exp.DateAdd), 793 "DATEDIFF": _build_datediff, 794 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 795 "DIV0": _build_if_from_div0, 796 "DIV0NULL": _build_if_from_div0null, 797 "EDITDISTANCE": lambda args: exp.Levenshtein( 798 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 799 ), 800 "FLATTEN": exp.Explode.from_arg_list, 801 "GET": exp.GetExtract.from_arg_list, 802 "GET_PATH": lambda args, dialect: exp.JSONExtract( 803 this=seq_get(args, 0), 804 expression=dialect.to_json_path(seq_get(args, 1)), 805 requires_json=True, 806 ), 807 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 808 "IFF": exp.If.from_arg_list, 809 "MD5_HEX": exp.MD5.from_arg_list, 810 "MD5_BINARY": exp.MD5Digest.from_arg_list, 811 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 812 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 813 "LAST_DAY": lambda args: exp.LastDay( 814 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 815 ), 816 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 817 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 818 "NULLIFZERO": _build_if_from_nullifzero, 819 "OBJECT_CONSTRUCT": _build_object_construct, 820 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 821 "PARSE_URL": lambda args: exp.ParseUrl( 822 this=seq_get(args, 0), permissive=seq_get(args, 1) 823 ), 824 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 825 "REGEXP_REPLACE": _build_regexp_replace, 826 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 827 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 828 "REPLACE": build_replace_with_optional_replacement, 829 "RLIKE": exp.RegexpLike.from_arg_list, 830 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 831 "SHA1_HEX": exp.SHA.from_arg_list, 832 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 833 "SHA2_HEX": exp.SHA2.from_arg_list, 834 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 835 "STRTOK": _build_strtok, 836 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 837 "TIMEADD": _build_date_time_add(exp.TimeAdd), 838 "TIMEDIFF": _build_datediff, 839 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 840 "TIMESTAMPDIFF": _build_datediff, 841 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 842 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 843 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 844 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 845 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 846 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 847 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 848 "TRY_TO_TIMESTAMP": _build_datetime( 849 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 850 ), 851 "TO_CHAR": build_timetostr_or_tochar, 852 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 853 "TO_NUMBER": lambda args: exp.ToNumber( 854 this=seq_get(args, 0), 855 format=seq_get(args, 1), 856 precision=seq_get(args, 2), 857 scale=seq_get(args, 3), 858 ), 859 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 860 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 861 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 862 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 863 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 864 "TO_VARCHAR": build_timetostr_or_tochar, 865 "TO_JSON": exp.JSONFormat.from_arg_list, 866 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 867 "ZEROIFNULL": _build_if_from_zeroifnull, 868 "LIKE": _build_like(exp.Like), 869 "ILIKE": _build_like(exp.ILike), 870 "SEARCH": _build_search, 871 } 872 FUNCTIONS.pop("PREDICT") 873 874 FUNCTION_PARSERS = { 875 **parser.Parser.FUNCTION_PARSERS, 876 "DATE_PART": lambda self: self._parse_date_part(), 877 "DIRECTORY": lambda self: self._parse_directory(), 878 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 879 "LISTAGG": lambda self: self._parse_string_agg(), 880 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 881 } 882 FUNCTION_PARSERS.pop("TRIM") 883 884 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 885 886 ALTER_PARSERS = { 887 **parser.Parser.ALTER_PARSERS, 888 "SESSION": lambda self: self._parse_alter_session(), 889 "UNSET": lambda self: self.expression( 890 exp.Set, 891 tag=self._match_text_seq("TAG"), 892 expressions=self._parse_csv(self._parse_id_var), 893 unset=True, 894 ), 895 } 896 897 STATEMENT_PARSERS = { 898 **parser.Parser.STATEMENT_PARSERS, 899 TokenType.GET: lambda self: self._parse_get(), 900 TokenType.PUT: lambda self: self._parse_put(), 901 TokenType.SHOW: lambda self: self._parse_show(), 902 } 903 904 PROPERTY_PARSERS = { 905 **parser.Parser.PROPERTY_PARSERS, 906 "CREDENTIALS": lambda self: self._parse_credentials_property(), 907 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 908 "LOCATION": lambda self: self._parse_location_property(), 909 "TAG": lambda self: self._parse_tag(), 910 "USING": lambda self: self._match_text_seq("TEMPLATE") 911 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 912 } 913 914 TYPE_CONVERTERS = { 915 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 916 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 917 } 918 919 SHOW_PARSERS = { 920 "DATABASES": _show_parser("DATABASES"), 921 "TERSE DATABASES": _show_parser("DATABASES"), 922 "SCHEMAS": _show_parser("SCHEMAS"), 923 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 924 "OBJECTS": _show_parser("OBJECTS"), 925 "TERSE OBJECTS": _show_parser("OBJECTS"), 926 "TABLES": _show_parser("TABLES"), 927 "TERSE TABLES": _show_parser("TABLES"), 928 "VIEWS": _show_parser("VIEWS"), 929 "TERSE VIEWS": _show_parser("VIEWS"), 930 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 931 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 932 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 933 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 934 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 935 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 936 "SEQUENCES": _show_parser("SEQUENCES"), 937 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 938 "STAGES": _show_parser("STAGES"), 939 "COLUMNS": _show_parser("COLUMNS"), 940 "USERS": _show_parser("USERS"), 941 "TERSE USERS": _show_parser("USERS"), 942 "FILE FORMATS": _show_parser("FILE FORMATS"), 943 "FUNCTIONS": _show_parser("FUNCTIONS"), 944 "PROCEDURES": _show_parser("PROCEDURES"), 945 "WAREHOUSES": _show_parser("WAREHOUSES"), 946 } 947 948 CONSTRAINT_PARSERS = { 949 **parser.Parser.CONSTRAINT_PARSERS, 950 "WITH": lambda self: self._parse_with_constraint(), 951 "MASKING": lambda self: self._parse_with_constraint(), 952 "PROJECTION": lambda self: self._parse_with_constraint(), 953 "TAG": lambda self: self._parse_with_constraint(), 954 } 955 956 STAGED_FILE_SINGLE_TOKENS = { 957 TokenType.DOT, 958 TokenType.MOD, 959 TokenType.SLASH, 960 } 961 962 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 963 964 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 965 966 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 967 968 LAMBDAS = { 969 **parser.Parser.LAMBDAS, 970 TokenType.ARROW: lambda self, expressions: self.expression( 971 exp.Lambda, 972 this=self._replace_lambda( 973 self._parse_assignment(), 974 expressions, 975 ), 976 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 977 ), 978 } 979 980 COLUMN_OPERATORS = { 981 **parser.Parser.COLUMN_OPERATORS, 982 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 983 exp.ModelAttribute, this=this, expression=attr 984 ), 985 } 986 987 def _parse_directory(self) -> exp.DirectoryStage: 988 table = self._parse_table_parts() 989 990 if isinstance(table, exp.Table): 991 table = table.this 992 993 return self.expression(exp.DirectoryStage, this=table) 994 995 def _parse_use(self) -> exp.Use: 996 if self._match_text_seq("SECONDARY", "ROLES"): 997 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 998 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 999 return self.expression( 1000 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1001 ) 1002 1003 return super()._parse_use() 1004 1005 def _negate_range( 1006 self, this: t.Optional[exp.Expression] = None 1007 ) -> t.Optional[exp.Expression]: 1008 if not this: 1009 return this 1010 1011 query = this.args.get("query") 1012 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1013 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1014 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1015 # which can produce different results (most likely a SnowFlake bug). 1016 # 1017 # https://docs.snowflake.com/en/sql-reference/functions/in 1018 # Context: https://github.com/tobymao/sqlglot/issues/3890 1019 return self.expression( 1020 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1021 ) 1022 1023 return self.expression(exp.Not, this=this) 1024 1025 def _parse_tag(self) -> exp.Tags: 1026 return self.expression( 1027 exp.Tags, 1028 expressions=self._parse_wrapped_csv(self._parse_property), 1029 ) 1030 1031 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1032 if self._prev.token_type != TokenType.WITH: 1033 self._retreat(self._index - 1) 1034 1035 if self._match_text_seq("MASKING", "POLICY"): 1036 policy = self._parse_column() 1037 return self.expression( 1038 exp.MaskingPolicyColumnConstraint, 1039 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1040 expressions=self._match(TokenType.USING) 1041 and self._parse_wrapped_csv(self._parse_id_var), 1042 ) 1043 if self._match_text_seq("PROJECTION", "POLICY"): 1044 policy = self._parse_column() 1045 return self.expression( 1046 exp.ProjectionPolicyColumnConstraint, 1047 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1048 ) 1049 if self._match(TokenType.TAG): 1050 return self._parse_tag() 1051 1052 return None 1053 1054 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1055 if self._match(TokenType.TAG): 1056 return self._parse_tag() 1057 1058 return super()._parse_with_property() 1059 1060 def _parse_create(self) -> exp.Create | exp.Command: 1061 expression = super()._parse_create() 1062 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1063 # Replace the Table node with the enclosed Identifier 1064 expression.this.replace(expression.this.this) 1065 1066 return expression 1067 1068 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1069 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1070 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1071 this = self._parse_var() or self._parse_type() 1072 1073 if not this: 1074 return None 1075 1076 self._match(TokenType.COMMA) 1077 expression = self._parse_bitwise() 1078 this = map_date_part(this) 1079 name = this.name.upper() 1080 1081 if name.startswith("EPOCH"): 1082 if name == "EPOCH_MILLISECOND": 1083 scale = 10**3 1084 elif name == "EPOCH_MICROSECOND": 1085 scale = 10**6 1086 elif name == "EPOCH_NANOSECOND": 1087 scale = 10**9 1088 else: 1089 scale = None 1090 1091 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 1092 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 1093 1094 if scale: 1095 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 1096 1097 return to_unix 1098 1099 return self.expression(exp.Extract, this=this, expression=expression) 1100 1101 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1102 if is_map: 1103 # Keys are strings in Snowflake's objects, see also: 1104 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1105 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1106 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1107 1108 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1109 1110 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1111 lateral = super()._parse_lateral() 1112 if not lateral: 1113 return lateral 1114 1115 if isinstance(lateral.this, exp.Explode): 1116 table_alias = lateral.args.get("alias") 1117 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1118 if table_alias and not table_alias.args.get("columns"): 1119 table_alias.set("columns", columns) 1120 elif not table_alias: 1121 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1122 1123 return lateral 1124 1125 def _parse_table_parts( 1126 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1127 ) -> exp.Table: 1128 # https://docs.snowflake.com/en/user-guide/querying-stage 1129 if self._match(TokenType.STRING, advance=False): 1130 table = self._parse_string() 1131 elif self._match_text_seq("@", advance=False): 1132 table = self._parse_location_path() 1133 else: 1134 table = None 1135 1136 if table: 1137 file_format = None 1138 pattern = None 1139 1140 wrapped = self._match(TokenType.L_PAREN) 1141 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1142 if self._match_text_seq("FILE_FORMAT", "=>"): 1143 file_format = self._parse_string() or super()._parse_table_parts( 1144 is_db_reference=is_db_reference 1145 ) 1146 elif self._match_text_seq("PATTERN", "=>"): 1147 pattern = self._parse_string() 1148 else: 1149 break 1150 1151 self._match(TokenType.COMMA) 1152 1153 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1154 else: 1155 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1156 1157 return table 1158 1159 def _parse_table( 1160 self, 1161 schema: bool = False, 1162 joins: bool = False, 1163 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1164 parse_bracket: bool = False, 1165 is_db_reference: bool = False, 1166 parse_partition: bool = False, 1167 consume_pipe: bool = False, 1168 ) -> t.Optional[exp.Expression]: 1169 table = super()._parse_table( 1170 schema=schema, 1171 joins=joins, 1172 alias_tokens=alias_tokens, 1173 parse_bracket=parse_bracket, 1174 is_db_reference=is_db_reference, 1175 parse_partition=parse_partition, 1176 ) 1177 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1178 table_from_rows = table.this 1179 for arg in exp.TableFromRows.arg_types: 1180 if arg != "this": 1181 table_from_rows.set(arg, table.args.get(arg)) 1182 1183 table = table_from_rows 1184 1185 return table 1186 1187 def _parse_id_var( 1188 self, 1189 any_token: bool = True, 1190 tokens: t.Optional[t.Collection[TokenType]] = None, 1191 ) -> t.Optional[exp.Expression]: 1192 if self._match_text_seq("IDENTIFIER", "("): 1193 identifier = ( 1194 super()._parse_id_var(any_token=any_token, tokens=tokens) 1195 or self._parse_string() 1196 ) 1197 self._match_r_paren() 1198 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1199 1200 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1201 1202 def _parse_show_snowflake(self, this: str) -> exp.Show: 1203 scope = None 1204 scope_kind = None 1205 1206 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1207 # which is syntactically valid but has no effect on the output 1208 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1209 1210 history = self._match_text_seq("HISTORY") 1211 1212 like = self._parse_string() if self._match(TokenType.LIKE) else None 1213 1214 if self._match(TokenType.IN): 1215 if self._match_text_seq("ACCOUNT"): 1216 scope_kind = "ACCOUNT" 1217 elif self._match_text_seq("CLASS"): 1218 scope_kind = "CLASS" 1219 scope = self._parse_table_parts() 1220 elif self._match_text_seq("APPLICATION"): 1221 scope_kind = "APPLICATION" 1222 if self._match_text_seq("PACKAGE"): 1223 scope_kind += " PACKAGE" 1224 scope = self._parse_table_parts() 1225 elif self._match_set(self.DB_CREATABLES): 1226 scope_kind = self._prev.text.upper() 1227 if self._curr: 1228 scope = self._parse_table_parts() 1229 elif self._curr: 1230 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1231 scope = self._parse_table_parts() 1232 1233 return self.expression( 1234 exp.Show, 1235 **{ 1236 "terse": terse, 1237 "this": this, 1238 "history": history, 1239 "like": like, 1240 "scope": scope, 1241 "scope_kind": scope_kind, 1242 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1243 "limit": self._parse_limit(), 1244 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1245 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1246 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1247 }, 1248 ) 1249 1250 def _parse_put(self) -> exp.Put | exp.Command: 1251 if self._curr.token_type != TokenType.STRING: 1252 return self._parse_as_command(self._prev) 1253 1254 return self.expression( 1255 exp.Put, 1256 this=self._parse_string(), 1257 target=self._parse_location_path(), 1258 properties=self._parse_properties(), 1259 ) 1260 1261 def _parse_get(self) -> t.Optional[exp.Expression]: 1262 start = self._prev 1263 1264 # If we detect GET( then we need to parse a function, not a statement 1265 if self._match(TokenType.L_PAREN): 1266 self._retreat(self._index - 2) 1267 return self._parse_expression() 1268 1269 target = self._parse_location_path() 1270 1271 # Parse as command if unquoted file path 1272 if self._curr.token_type == TokenType.URI_START: 1273 return self._parse_as_command(start) 1274 1275 return self.expression( 1276 exp.Get, 1277 this=self._parse_string(), 1278 target=target, 1279 properties=self._parse_properties(), 1280 ) 1281 1282 def _parse_location_property(self) -> exp.LocationProperty: 1283 self._match(TokenType.EQ) 1284 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1285 1286 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1287 # Parse either a subquery or a staged file 1288 return ( 1289 self._parse_select(table=True, parse_subquery_alias=False) 1290 if self._match(TokenType.L_PAREN, advance=False) 1291 else self._parse_table_parts() 1292 ) 1293 1294 def _parse_location_path(self) -> exp.Var: 1295 start = self._curr 1296 self._advance_any(ignore_reserved=True) 1297 1298 # We avoid consuming a comma token because external tables like @foo and @bar 1299 # can be joined in a query with a comma separator, as well as closing paren 1300 # in case of subqueries 1301 while self._is_connected() and not self._match_set( 1302 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1303 ): 1304 self._advance_any(ignore_reserved=True) 1305 1306 return exp.var(self._find_sql(start, self._prev)) 1307 1308 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1309 this = super()._parse_lambda_arg() 1310 1311 if not this: 1312 return this 1313 1314 typ = self._parse_types() 1315 1316 if typ: 1317 return self.expression(exp.Cast, this=this, to=typ) 1318 1319 return this 1320 1321 def _parse_foreign_key(self) -> exp.ForeignKey: 1322 # inlineFK, the REFERENCES columns are implied 1323 if self._match(TokenType.REFERENCES, advance=False): 1324 return self.expression(exp.ForeignKey) 1325 1326 # outoflineFK, explicitly names the columns 1327 return super()._parse_foreign_key() 1328 1329 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1330 self._match(TokenType.EQ) 1331 if self._match(TokenType.L_PAREN, advance=False): 1332 expressions = self._parse_wrapped_options() 1333 else: 1334 expressions = [self._parse_format_name()] 1335 1336 return self.expression( 1337 exp.FileFormatProperty, 1338 expressions=expressions, 1339 ) 1340 1341 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1342 return self.expression( 1343 exp.CredentialsProperty, 1344 expressions=self._parse_wrapped_options(), 1345 ) 1346 1347 def _parse_semantic_view(self) -> exp.SemanticView: 1348 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1349 1350 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1351 if self._match_text_seq("DIMENSIONS"): 1352 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1353 if self._match_text_seq("METRICS"): 1354 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1355 if self._match_text_seq("WHERE"): 1356 kwargs["where"] = self._parse_expression() 1357 1358 return self.expression(exp.SemanticView, **kwargs) 1359 1360 class Tokenizer(tokens.Tokenizer): 1361 STRING_ESCAPES = ["\\", "'"] 1362 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1363 RAW_STRINGS = ["$$"] 1364 COMMENTS = ["--", "//", ("/*", "*/")] 1365 NESTED_COMMENTS = False 1366 1367 KEYWORDS = { 1368 **tokens.Tokenizer.KEYWORDS, 1369 "BYTEINT": TokenType.INT, 1370 "FILE://": TokenType.URI_START, 1371 "FILE FORMAT": TokenType.FILE_FORMAT, 1372 "GET": TokenType.GET, 1373 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1374 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1375 "MINUS": TokenType.EXCEPT, 1376 "NCHAR VARYING": TokenType.VARCHAR, 1377 "PUT": TokenType.PUT, 1378 "REMOVE": TokenType.COMMAND, 1379 "RM": TokenType.COMMAND, 1380 "SAMPLE": TokenType.TABLE_SAMPLE, 1381 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1382 "SQL_DOUBLE": TokenType.DOUBLE, 1383 "SQL_VARCHAR": TokenType.VARCHAR, 1384 "STAGE": TokenType.STAGE, 1385 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1386 "STREAMLIT": TokenType.STREAMLIT, 1387 "TAG": TokenType.TAG, 1388 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1389 "TOP": TokenType.TOP, 1390 "WAREHOUSE": TokenType.WAREHOUSE, 1391 } 1392 KEYWORDS.pop("/*+") 1393 1394 SINGLE_TOKENS = { 1395 **tokens.Tokenizer.SINGLE_TOKENS, 1396 "$": TokenType.PARAMETER, 1397 "!": TokenType.EXCLAMATION, 1398 } 1399 1400 VAR_SINGLE_TOKENS = {"$"} 1401 1402 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1403 1404 class Generator(generator.Generator): 1405 PARAMETER_TOKEN = "$" 1406 MATCHED_BY_SOURCE = False 1407 SINGLE_STRING_INTERVAL = True 1408 JOIN_HINTS = False 1409 TABLE_HINTS = False 1410 QUERY_HINTS = False 1411 AGGREGATE_FILTER_SUPPORTED = False 1412 SUPPORTS_TABLE_COPY = False 1413 COLLATE_IS_FUNC = True 1414 LIMIT_ONLY_LITERALS = True 1415 JSON_KEY_VALUE_PAIR_SEP = "," 1416 INSERT_OVERWRITE = " OVERWRITE INTO" 1417 STRUCT_DELIMITER = ("(", ")") 1418 COPY_PARAMS_ARE_WRAPPED = False 1419 COPY_PARAMS_EQ_REQUIRED = True 1420 STAR_EXCEPT = "EXCLUDE" 1421 SUPPORTS_EXPLODING_PROJECTIONS = False 1422 ARRAY_CONCAT_IS_VAR_LEN = False 1423 SUPPORTS_CONVERT_TIMEZONE = True 1424 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1425 SUPPORTS_MEDIAN = True 1426 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1427 SUPPORTS_DECODE_CASE = True 1428 IS_BOOL_ALLOWED = False 1429 1430 TRANSFORMS = { 1431 **generator.Generator.TRANSFORMS, 1432 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1433 exp.ArgMax: rename_func("MAX_BY"), 1434 exp.ArgMin: rename_func("MIN_BY"), 1435 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1436 exp.ArrayContains: lambda self, e: self.func( 1437 "ARRAY_CONTAINS", 1438 e.expression 1439 if e.args.get("ensure_variant") is False 1440 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1441 e.this, 1442 ), 1443 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1444 exp.AtTimeZone: lambda self, e: self.func( 1445 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1446 ), 1447 exp.BitwiseOr: rename_func("BITOR"), 1448 exp.BitwiseXor: rename_func("BITXOR"), 1449 exp.BitwiseAnd: rename_func("BITAND"), 1450 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1451 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1452 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1453 exp.BitwiseNot: rename_func("BITNOT"), 1454 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1455 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1456 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1457 exp.DateAdd: date_delta_sql("DATEADD"), 1458 exp.DateDiff: date_delta_sql("DATEDIFF"), 1459 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1460 exp.DatetimeDiff: timestampdiff_sql, 1461 exp.DateStrToDate: datestrtodate_sql, 1462 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1463 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1464 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1465 exp.DayOfYear: rename_func("DAYOFYEAR"), 1466 exp.Explode: rename_func("FLATTEN"), 1467 exp.Extract: lambda self, e: self.func( 1468 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1469 ), 1470 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1471 exp.FileFormatProperty: lambda self, 1472 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1473 exp.FromTimeZone: lambda self, e: self.func( 1474 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1475 ), 1476 exp.GenerateSeries: lambda self, e: self.func( 1477 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1478 ), 1479 exp.GetExtract: rename_func("GET"), 1480 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1481 exp.If: if_sql(name="IFF", false_value="NULL"), 1482 exp.JSONExtractArray: _json_extract_value_array_sql, 1483 exp.JSONExtractScalar: lambda self, e: self.func( 1484 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1485 ), 1486 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1487 exp.JSONPathRoot: lambda *_: "", 1488 exp.JSONValueArray: _json_extract_value_array_sql, 1489 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1490 rename_func("EDITDISTANCE") 1491 ), 1492 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1493 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1494 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1495 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1496 exp.MakeInterval: no_make_interval_sql, 1497 exp.Max: max_or_greatest, 1498 exp.Min: min_or_least, 1499 exp.ParseJSON: lambda self, e: self.func( 1500 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1501 ), 1502 exp.JSONFormat: rename_func("TO_JSON"), 1503 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1504 exp.PercentileCont: transforms.preprocess( 1505 [transforms.add_within_group_for_percentiles] 1506 ), 1507 exp.PercentileDisc: transforms.preprocess( 1508 [transforms.add_within_group_for_percentiles] 1509 ), 1510 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1511 exp.RegexpExtract: _regexpextract_sql, 1512 exp.RegexpExtractAll: _regexpextract_sql, 1513 exp.RegexpILike: _regexpilike_sql, 1514 exp.Rand: rename_func("RANDOM"), 1515 exp.Select: transforms.preprocess( 1516 [ 1517 transforms.eliminate_window_clause, 1518 transforms.eliminate_distinct_on, 1519 transforms.explode_projection_to_unnest(), 1520 transforms.eliminate_semi_and_anti_joins, 1521 _transform_generate_date_array, 1522 _qualify_unnested_columns, 1523 _eliminate_dot_variant_lookup, 1524 ] 1525 ), 1526 exp.SHA: rename_func("SHA1"), 1527 exp.MD5Digest: rename_func("MD5_BINARY"), 1528 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1529 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1530 exp.LowerHex: rename_func("TO_CHAR"), 1531 exp.SortArray: rename_func("ARRAY_SORT"), 1532 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1533 exp.StartsWith: rename_func("STARTSWITH"), 1534 exp.EndsWith: rename_func("ENDSWITH"), 1535 exp.StrPosition: lambda self, e: strposition_sql( 1536 self, e, func_name="CHARINDEX", supports_position=True 1537 ), 1538 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1539 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1540 exp.Stuff: rename_func("INSERT"), 1541 exp.StPoint: rename_func("ST_MAKEPOINT"), 1542 exp.TimeAdd: date_delta_sql("TIMEADD"), 1543 exp.Timestamp: no_timestamp_sql, 1544 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1545 exp.TimestampDiff: lambda self, e: self.func( 1546 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1547 ), 1548 exp.TimestampTrunc: timestamptrunc_sql(), 1549 exp.TimeStrToTime: timestrtotime_sql, 1550 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1551 exp.ToArray: rename_func("TO_ARRAY"), 1552 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1553 exp.ToDouble: rename_func("TO_DOUBLE"), 1554 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1555 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1556 exp.TsOrDsToDate: lambda self, e: self.func( 1557 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1558 ), 1559 exp.TsOrDsToTime: lambda self, e: self.func( 1560 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1561 ), 1562 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1563 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1564 exp.Uuid: rename_func("UUID_STRING"), 1565 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1566 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1567 exp.Xor: rename_func("BOOLXOR"), 1568 exp.ByteLength: rename_func("OCTET_LENGTH"), 1569 } 1570 1571 SUPPORTED_JSON_PATH_PARTS = { 1572 exp.JSONPathKey, 1573 exp.JSONPathRoot, 1574 exp.JSONPathSubscript, 1575 } 1576 1577 TYPE_MAPPING = { 1578 **generator.Generator.TYPE_MAPPING, 1579 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1580 exp.DataType.Type.NESTED: "OBJECT", 1581 exp.DataType.Type.STRUCT: "OBJECT", 1582 exp.DataType.Type.TEXT: "VARCHAR", 1583 } 1584 1585 TOKEN_MAPPING = { 1586 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1587 } 1588 1589 PROPERTIES_LOCATION = { 1590 **generator.Generator.PROPERTIES_LOCATION, 1591 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1592 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1593 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1594 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1595 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1596 } 1597 1598 UNSUPPORTED_VALUES_EXPRESSIONS = { 1599 exp.Map, 1600 exp.StarMap, 1601 exp.Struct, 1602 exp.VarMap, 1603 } 1604 1605 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1606 1607 def with_properties(self, properties: exp.Properties) -> str: 1608 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1609 1610 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1611 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1612 values_as_table = False 1613 1614 return super().values_sql(expression, values_as_table=values_as_table) 1615 1616 def datatype_sql(self, expression: exp.DataType) -> str: 1617 expressions = expression.expressions 1618 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1619 for field_type in expressions: 1620 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1621 if isinstance(field_type, exp.DataType): 1622 return "OBJECT" 1623 if ( 1624 isinstance(field_type, exp.ColumnDef) 1625 and field_type.this 1626 and field_type.this.is_string 1627 ): 1628 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1629 # converting 'foo' into an identifier, we also need to quote it because these 1630 # keys are case-sensitive. For example: 1631 # 1632 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1633 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1634 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1635 1636 return super().datatype_sql(expression) 1637 1638 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1639 return self.func( 1640 "TO_NUMBER", 1641 expression.this, 1642 expression.args.get("format"), 1643 expression.args.get("precision"), 1644 expression.args.get("scale"), 1645 ) 1646 1647 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1648 milli = expression.args.get("milli") 1649 if milli is not None: 1650 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1651 expression.set("nano", milli_to_nano) 1652 1653 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1654 1655 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1656 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1657 return self.func("TO_GEOGRAPHY", expression.this) 1658 if expression.is_type(exp.DataType.Type.GEOMETRY): 1659 return self.func("TO_GEOMETRY", expression.this) 1660 1661 return super().cast_sql(expression, safe_prefix=safe_prefix) 1662 1663 def trycast_sql(self, expression: exp.TryCast) -> str: 1664 value = expression.this 1665 1666 if value.type is None: 1667 from sqlglot.optimizer.annotate_types import annotate_types 1668 1669 value = annotate_types(value, dialect=self.dialect) 1670 1671 # Snowflake requires that TRY_CAST's value be a string 1672 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1673 # if we can deduce that the value is a string, then we can generate TRY_CAST 1674 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1675 return super().trycast_sql(expression) 1676 1677 return self.cast_sql(expression) 1678 1679 def log_sql(self, expression: exp.Log) -> str: 1680 if not expression.expression: 1681 return self.func("LN", expression.this) 1682 1683 return super().log_sql(expression) 1684 1685 def unnest_sql(self, expression: exp.Unnest) -> str: 1686 unnest_alias = expression.args.get("alias") 1687 offset = expression.args.get("offset") 1688 1689 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1690 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1691 1692 columns = [ 1693 exp.to_identifier("seq"), 1694 exp.to_identifier("key"), 1695 exp.to_identifier("path"), 1696 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1697 value, 1698 exp.to_identifier("this"), 1699 ] 1700 1701 if unnest_alias: 1702 unnest_alias.set("columns", columns) 1703 else: 1704 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1705 1706 table_input = self.sql(expression.expressions[0]) 1707 if not table_input.startswith("INPUT =>"): 1708 table_input = f"INPUT => {table_input}" 1709 1710 expression_parent = expression.parent 1711 1712 explode = ( 1713 f"FLATTEN({table_input})" 1714 if isinstance(expression_parent, exp.Lateral) 1715 else f"TABLE(FLATTEN({table_input}))" 1716 ) 1717 alias = self.sql(unnest_alias) 1718 alias = f" AS {alias}" if alias else "" 1719 value = ( 1720 "" 1721 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1722 else f"{value} FROM " 1723 ) 1724 1725 return f"{value}{explode}{alias}" 1726 1727 def show_sql(self, expression: exp.Show) -> str: 1728 terse = "TERSE " if expression.args.get("terse") else "" 1729 history = " HISTORY" if expression.args.get("history") else "" 1730 like = self.sql(expression, "like") 1731 like = f" LIKE {like}" if like else "" 1732 1733 scope = self.sql(expression, "scope") 1734 scope = f" {scope}" if scope else "" 1735 1736 scope_kind = self.sql(expression, "scope_kind") 1737 if scope_kind: 1738 scope_kind = f" IN {scope_kind}" 1739 1740 starts_with = self.sql(expression, "starts_with") 1741 if starts_with: 1742 starts_with = f" STARTS WITH {starts_with}" 1743 1744 limit = self.sql(expression, "limit") 1745 1746 from_ = self.sql(expression, "from") 1747 if from_: 1748 from_ = f" FROM {from_}" 1749 1750 privileges = self.expressions(expression, key="privileges", flat=True) 1751 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1752 1753 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1754 1755 def describe_sql(self, expression: exp.Describe) -> str: 1756 # Default to table if kind is unknown 1757 kind_value = expression.args.get("kind") or "TABLE" 1758 kind = f" {kind_value}" if kind_value else "" 1759 this = f" {self.sql(expression, 'this')}" 1760 expressions = self.expressions(expression, flat=True) 1761 expressions = f" {expressions}" if expressions else "" 1762 return f"DESCRIBE{kind}{this}{expressions}" 1763 1764 def generatedasidentitycolumnconstraint_sql( 1765 self, expression: exp.GeneratedAsIdentityColumnConstraint 1766 ) -> str: 1767 start = expression.args.get("start") 1768 start = f" START {start}" if start else "" 1769 increment = expression.args.get("increment") 1770 increment = f" INCREMENT {increment}" if increment else "" 1771 1772 order = expression.args.get("order") 1773 if order is not None: 1774 order_clause = " ORDER" if order else " NOORDER" 1775 else: 1776 order_clause = "" 1777 1778 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1779 1780 def cluster_sql(self, expression: exp.Cluster) -> str: 1781 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1782 1783 def struct_sql(self, expression: exp.Struct) -> str: 1784 if len(expression.expressions) == 1: 1785 arg = expression.expressions[0] 1786 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1787 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1788 return f"{{{self.sql(expression.expressions[0])}}}" 1789 1790 keys = [] 1791 values = [] 1792 1793 for i, e in enumerate(expression.expressions): 1794 if isinstance(e, exp.PropertyEQ): 1795 keys.append( 1796 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1797 ) 1798 values.append(e.expression) 1799 else: 1800 keys.append(exp.Literal.string(f"_{i}")) 1801 values.append(e) 1802 1803 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1804 1805 @unsupported_args("weight", "accuracy") 1806 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1807 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1808 1809 def alterset_sql(self, expression: exp.AlterSet) -> str: 1810 exprs = self.expressions(expression, flat=True) 1811 exprs = f" {exprs}" if exprs else "" 1812 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1813 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1814 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1815 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1816 tag = self.expressions(expression, key="tag", flat=True) 1817 tag = f" TAG {tag}" if tag else "" 1818 1819 return f"SET{exprs}{file_format}{copy_options}{tag}" 1820 1821 def strtotime_sql(self, expression: exp.StrToTime): 1822 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1823 return self.func( 1824 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1825 ) 1826 1827 def timestampsub_sql(self, expression: exp.TimestampSub): 1828 return self.sql( 1829 exp.TimestampAdd( 1830 this=expression.this, 1831 expression=expression.expression * -1, 1832 unit=expression.unit, 1833 ) 1834 ) 1835 1836 def jsonextract_sql(self, expression: exp.JSONExtract): 1837 this = expression.this 1838 1839 # JSON strings are valid coming from other dialects such as BQ so 1840 # for these cases we PARSE_JSON preemptively 1841 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1842 "requires_json" 1843 ): 1844 this = exp.ParseJSON(this=this) 1845 1846 return self.func( 1847 "GET_PATH", 1848 this, 1849 expression.expression, 1850 ) 1851 1852 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1853 this = expression.this 1854 if this.is_string: 1855 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1856 1857 return self.func("TO_CHAR", this, self.format_time(expression)) 1858 1859 def datesub_sql(self, expression: exp.DateSub) -> str: 1860 value = expression.expression 1861 if value: 1862 value.replace(value * (-1)) 1863 else: 1864 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1865 1866 return date_delta_sql("DATEADD")(self, expression) 1867 1868 def select_sql(self, expression: exp.Select) -> str: 1869 limit = expression.args.get("limit") 1870 offset = expression.args.get("offset") 1871 if offset and not limit: 1872 expression.limit(exp.Null(), copy=False) 1873 return super().select_sql(expression) 1874 1875 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1876 is_materialized = expression.find(exp.MaterializedProperty) 1877 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1878 1879 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1880 # For materialized views, COPY GRANTS is located *before* the columns list 1881 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1882 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1883 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1884 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1885 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1886 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1887 1888 this_name = self.sql(expression.this, "this") 1889 copy_grants = self.sql(copy_grants_property) 1890 this_schema = self.schema_columns_sql(expression.this) 1891 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1892 1893 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1894 1895 return super().createable_sql(expression, locations) 1896 1897 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1898 this = expression.this 1899 1900 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1901 # and add it later as part of the WITHIN GROUP clause 1902 order = this if isinstance(this, exp.Order) else None 1903 if order: 1904 expression.set("this", order.this.pop()) 1905 1906 expr_sql = super().arrayagg_sql(expression) 1907 1908 if order: 1909 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1910 1911 return expr_sql 1912 1913 def array_sql(self, expression: exp.Array) -> str: 1914 expressions = expression.expressions 1915 1916 first_expr = seq_get(expressions, 0) 1917 if isinstance(first_expr, exp.Select): 1918 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1919 if first_expr.text("kind").upper() == "STRUCT": 1920 object_construct_args = [] 1921 for expr in first_expr.expressions: 1922 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1923 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1924 name = expr.this if isinstance(expr, exp.Alias) else expr 1925 1926 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1927 1928 array_agg = exp.ArrayAgg( 1929 this=_build_object_construct(args=object_construct_args) 1930 ) 1931 1932 first_expr.set("kind", None) 1933 first_expr.set("expressions", [array_agg]) 1934 1935 return self.sql(first_expr.subquery()) 1936 1937 return inline_array_sql(self, expression) 1938 1939 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1940 zone = self.sql(expression, "this") 1941 if not zone: 1942 return super().currentdate_sql(expression) 1943 1944 expr = exp.Cast( 1945 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1946 to=exp.DataType(this=exp.DataType.Type.DATE), 1947 ) 1948 return self.sql(expr) 1949 1950 def dot_sql(self, expression: exp.Dot) -> str: 1951 this = expression.this 1952 1953 if not this.type: 1954 from sqlglot.optimizer.annotate_types import annotate_types 1955 1956 this = annotate_types(this, dialect=self.dialect) 1957 1958 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1959 # Generate colon notation for the top level STRUCT 1960 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1961 1962 return super().dot_sql(expression) 1963 1964 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1965 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1966 1967 def format_sql(self, expression: exp.Format) -> str: 1968 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1969 return self.func("TO_CHAR", expression.expressions[0]) 1970 1971 return self.function_fallback_sql(expression) 1972 1973 def splitpart_sql(self, expression: exp.SplitPart) -> str: 1974 # Set part_index to 1 if missing 1975 if not expression.args.get("delimiter"): 1976 expression.set("delimiter", exp.Literal.string(" ")) 1977 1978 if not expression.args.get("part_index"): 1979 expression.set("part_index", exp.Literal.number(1)) 1980 1981 return rename_func("SPLIT_PART")(self, expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
722 def quote_identifier(self, expression: E, identify: bool = True) -> E: 723 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 724 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 725 if ( 726 isinstance(expression, exp.Identifier) 727 and isinstance(expression.parent, exp.Table) 728 and expression.name.lower() == "dual" 729 ): 730 return expression # type: ignore 731 732 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
734 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 735 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 736 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
738 class Parser(parser.Parser): 739 IDENTIFY_PIVOT_STRINGS = True 740 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 741 COLON_IS_VARIANT_EXTRACT = True 742 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 743 744 ID_VAR_TOKENS = { 745 *parser.Parser.ID_VAR_TOKENS, 746 TokenType.EXCEPT, 747 TokenType.MATCH_CONDITION, 748 } 749 750 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 751 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 752 753 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 754 755 FUNCTIONS = { 756 **parser.Parser.FUNCTIONS, 757 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 758 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 759 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 760 this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False 761 ), 762 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 763 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 764 start=seq_get(args, 0), 765 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 766 step=seq_get(args, 2), 767 ), 768 "ARRAY_SORT": exp.SortArray.from_arg_list, 769 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 770 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 771 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 772 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 773 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 774 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 775 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 776 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 777 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 778 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 779 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 780 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 781 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 782 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 783 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 784 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 785 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 786 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 787 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 788 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 789 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 790 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 791 "DATE_TRUNC": _date_trunc_to_time, 792 "DATEADD": _build_date_time_add(exp.DateAdd), 793 "DATEDIFF": _build_datediff, 794 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 795 "DIV0": _build_if_from_div0, 796 "DIV0NULL": _build_if_from_div0null, 797 "EDITDISTANCE": lambda args: exp.Levenshtein( 798 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 799 ), 800 "FLATTEN": exp.Explode.from_arg_list, 801 "GET": exp.GetExtract.from_arg_list, 802 "GET_PATH": lambda args, dialect: exp.JSONExtract( 803 this=seq_get(args, 0), 804 expression=dialect.to_json_path(seq_get(args, 1)), 805 requires_json=True, 806 ), 807 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 808 "IFF": exp.If.from_arg_list, 809 "MD5_HEX": exp.MD5.from_arg_list, 810 "MD5_BINARY": exp.MD5Digest.from_arg_list, 811 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 812 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 813 "LAST_DAY": lambda args: exp.LastDay( 814 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 815 ), 816 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 817 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 818 "NULLIFZERO": _build_if_from_nullifzero, 819 "OBJECT_CONSTRUCT": _build_object_construct, 820 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 821 "PARSE_URL": lambda args: exp.ParseUrl( 822 this=seq_get(args, 0), permissive=seq_get(args, 1) 823 ), 824 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 825 "REGEXP_REPLACE": _build_regexp_replace, 826 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 827 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 828 "REPLACE": build_replace_with_optional_replacement, 829 "RLIKE": exp.RegexpLike.from_arg_list, 830 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 831 "SHA1_HEX": exp.SHA.from_arg_list, 832 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 833 "SHA2_HEX": exp.SHA2.from_arg_list, 834 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 835 "STRTOK": _build_strtok, 836 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 837 "TIMEADD": _build_date_time_add(exp.TimeAdd), 838 "TIMEDIFF": _build_datediff, 839 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 840 "TIMESTAMPDIFF": _build_datediff, 841 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 842 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 843 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 844 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 845 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 846 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 847 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 848 "TRY_TO_TIMESTAMP": _build_datetime( 849 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 850 ), 851 "TO_CHAR": build_timetostr_or_tochar, 852 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 853 "TO_NUMBER": lambda args: exp.ToNumber( 854 this=seq_get(args, 0), 855 format=seq_get(args, 1), 856 precision=seq_get(args, 2), 857 scale=seq_get(args, 3), 858 ), 859 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 860 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 861 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 862 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 863 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 864 "TO_VARCHAR": build_timetostr_or_tochar, 865 "TO_JSON": exp.JSONFormat.from_arg_list, 866 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 867 "ZEROIFNULL": _build_if_from_zeroifnull, 868 "LIKE": _build_like(exp.Like), 869 "ILIKE": _build_like(exp.ILike), 870 "SEARCH": _build_search, 871 } 872 FUNCTIONS.pop("PREDICT") 873 874 FUNCTION_PARSERS = { 875 **parser.Parser.FUNCTION_PARSERS, 876 "DATE_PART": lambda self: self._parse_date_part(), 877 "DIRECTORY": lambda self: self._parse_directory(), 878 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 879 "LISTAGG": lambda self: self._parse_string_agg(), 880 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 881 } 882 FUNCTION_PARSERS.pop("TRIM") 883 884 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 885 886 ALTER_PARSERS = { 887 **parser.Parser.ALTER_PARSERS, 888 "SESSION": lambda self: self._parse_alter_session(), 889 "UNSET": lambda self: self.expression( 890 exp.Set, 891 tag=self._match_text_seq("TAG"), 892 expressions=self._parse_csv(self._parse_id_var), 893 unset=True, 894 ), 895 } 896 897 STATEMENT_PARSERS = { 898 **parser.Parser.STATEMENT_PARSERS, 899 TokenType.GET: lambda self: self._parse_get(), 900 TokenType.PUT: lambda self: self._parse_put(), 901 TokenType.SHOW: lambda self: self._parse_show(), 902 } 903 904 PROPERTY_PARSERS = { 905 **parser.Parser.PROPERTY_PARSERS, 906 "CREDENTIALS": lambda self: self._parse_credentials_property(), 907 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 908 "LOCATION": lambda self: self._parse_location_property(), 909 "TAG": lambda self: self._parse_tag(), 910 "USING": lambda self: self._match_text_seq("TEMPLATE") 911 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 912 } 913 914 TYPE_CONVERTERS = { 915 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 916 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 917 } 918 919 SHOW_PARSERS = { 920 "DATABASES": _show_parser("DATABASES"), 921 "TERSE DATABASES": _show_parser("DATABASES"), 922 "SCHEMAS": _show_parser("SCHEMAS"), 923 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 924 "OBJECTS": _show_parser("OBJECTS"), 925 "TERSE OBJECTS": _show_parser("OBJECTS"), 926 "TABLES": _show_parser("TABLES"), 927 "TERSE TABLES": _show_parser("TABLES"), 928 "VIEWS": _show_parser("VIEWS"), 929 "TERSE VIEWS": _show_parser("VIEWS"), 930 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 931 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 932 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 933 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 934 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 935 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 936 "SEQUENCES": _show_parser("SEQUENCES"), 937 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 938 "STAGES": _show_parser("STAGES"), 939 "COLUMNS": _show_parser("COLUMNS"), 940 "USERS": _show_parser("USERS"), 941 "TERSE USERS": _show_parser("USERS"), 942 "FILE FORMATS": _show_parser("FILE FORMATS"), 943 "FUNCTIONS": _show_parser("FUNCTIONS"), 944 "PROCEDURES": _show_parser("PROCEDURES"), 945 "WAREHOUSES": _show_parser("WAREHOUSES"), 946 } 947 948 CONSTRAINT_PARSERS = { 949 **parser.Parser.CONSTRAINT_PARSERS, 950 "WITH": lambda self: self._parse_with_constraint(), 951 "MASKING": lambda self: self._parse_with_constraint(), 952 "PROJECTION": lambda self: self._parse_with_constraint(), 953 "TAG": lambda self: self._parse_with_constraint(), 954 } 955 956 STAGED_FILE_SINGLE_TOKENS = { 957 TokenType.DOT, 958 TokenType.MOD, 959 TokenType.SLASH, 960 } 961 962 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 963 964 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 965 966 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 967 968 LAMBDAS = { 969 **parser.Parser.LAMBDAS, 970 TokenType.ARROW: lambda self, expressions: self.expression( 971 exp.Lambda, 972 this=self._replace_lambda( 973 self._parse_assignment(), 974 expressions, 975 ), 976 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 977 ), 978 } 979 980 COLUMN_OPERATORS = { 981 **parser.Parser.COLUMN_OPERATORS, 982 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 983 exp.ModelAttribute, this=this, expression=attr 984 ), 985 } 986 987 def _parse_directory(self) -> exp.DirectoryStage: 988 table = self._parse_table_parts() 989 990 if isinstance(table, exp.Table): 991 table = table.this 992 993 return self.expression(exp.DirectoryStage, this=table) 994 995 def _parse_use(self) -> exp.Use: 996 if self._match_text_seq("SECONDARY", "ROLES"): 997 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 998 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 999 return self.expression( 1000 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 1001 ) 1002 1003 return super()._parse_use() 1004 1005 def _negate_range( 1006 self, this: t.Optional[exp.Expression] = None 1007 ) -> t.Optional[exp.Expression]: 1008 if not this: 1009 return this 1010 1011 query = this.args.get("query") 1012 if isinstance(this, exp.In) and isinstance(query, exp.Query): 1013 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 1014 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 1015 # which can produce different results (most likely a SnowFlake bug). 1016 # 1017 # https://docs.snowflake.com/en/sql-reference/functions/in 1018 # Context: https://github.com/tobymao/sqlglot/issues/3890 1019 return self.expression( 1020 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 1021 ) 1022 1023 return self.expression(exp.Not, this=this) 1024 1025 def _parse_tag(self) -> exp.Tags: 1026 return self.expression( 1027 exp.Tags, 1028 expressions=self._parse_wrapped_csv(self._parse_property), 1029 ) 1030 1031 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 1032 if self._prev.token_type != TokenType.WITH: 1033 self._retreat(self._index - 1) 1034 1035 if self._match_text_seq("MASKING", "POLICY"): 1036 policy = self._parse_column() 1037 return self.expression( 1038 exp.MaskingPolicyColumnConstraint, 1039 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1040 expressions=self._match(TokenType.USING) 1041 and self._parse_wrapped_csv(self._parse_id_var), 1042 ) 1043 if self._match_text_seq("PROJECTION", "POLICY"): 1044 policy = self._parse_column() 1045 return self.expression( 1046 exp.ProjectionPolicyColumnConstraint, 1047 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 1048 ) 1049 if self._match(TokenType.TAG): 1050 return self._parse_tag() 1051 1052 return None 1053 1054 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1055 if self._match(TokenType.TAG): 1056 return self._parse_tag() 1057 1058 return super()._parse_with_property() 1059 1060 def _parse_create(self) -> exp.Create | exp.Command: 1061 expression = super()._parse_create() 1062 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 1063 # Replace the Table node with the enclosed Identifier 1064 expression.this.replace(expression.this.this) 1065 1066 return expression 1067 1068 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 1069 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 1070 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 1071 this = self._parse_var() or self._parse_type() 1072 1073 if not this: 1074 return None 1075 1076 self._match(TokenType.COMMA) 1077 expression = self._parse_bitwise() 1078 this = map_date_part(this) 1079 name = this.name.upper() 1080 1081 if name.startswith("EPOCH"): 1082 if name == "EPOCH_MILLISECOND": 1083 scale = 10**3 1084 elif name == "EPOCH_MICROSECOND": 1085 scale = 10**6 1086 elif name == "EPOCH_NANOSECOND": 1087 scale = 10**9 1088 else: 1089 scale = None 1090 1091 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 1092 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 1093 1094 if scale: 1095 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 1096 1097 return to_unix 1098 1099 return self.expression(exp.Extract, this=this, expression=expression) 1100 1101 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1102 if is_map: 1103 # Keys are strings in Snowflake's objects, see also: 1104 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1105 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1106 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1107 1108 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1109 1110 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1111 lateral = super()._parse_lateral() 1112 if not lateral: 1113 return lateral 1114 1115 if isinstance(lateral.this, exp.Explode): 1116 table_alias = lateral.args.get("alias") 1117 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1118 if table_alias and not table_alias.args.get("columns"): 1119 table_alias.set("columns", columns) 1120 elif not table_alias: 1121 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1122 1123 return lateral 1124 1125 def _parse_table_parts( 1126 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1127 ) -> exp.Table: 1128 # https://docs.snowflake.com/en/user-guide/querying-stage 1129 if self._match(TokenType.STRING, advance=False): 1130 table = self._parse_string() 1131 elif self._match_text_seq("@", advance=False): 1132 table = self._parse_location_path() 1133 else: 1134 table = None 1135 1136 if table: 1137 file_format = None 1138 pattern = None 1139 1140 wrapped = self._match(TokenType.L_PAREN) 1141 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1142 if self._match_text_seq("FILE_FORMAT", "=>"): 1143 file_format = self._parse_string() or super()._parse_table_parts( 1144 is_db_reference=is_db_reference 1145 ) 1146 elif self._match_text_seq("PATTERN", "=>"): 1147 pattern = self._parse_string() 1148 else: 1149 break 1150 1151 self._match(TokenType.COMMA) 1152 1153 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1154 else: 1155 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1156 1157 return table 1158 1159 def _parse_table( 1160 self, 1161 schema: bool = False, 1162 joins: bool = False, 1163 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1164 parse_bracket: bool = False, 1165 is_db_reference: bool = False, 1166 parse_partition: bool = False, 1167 consume_pipe: bool = False, 1168 ) -> t.Optional[exp.Expression]: 1169 table = super()._parse_table( 1170 schema=schema, 1171 joins=joins, 1172 alias_tokens=alias_tokens, 1173 parse_bracket=parse_bracket, 1174 is_db_reference=is_db_reference, 1175 parse_partition=parse_partition, 1176 ) 1177 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1178 table_from_rows = table.this 1179 for arg in exp.TableFromRows.arg_types: 1180 if arg != "this": 1181 table_from_rows.set(arg, table.args.get(arg)) 1182 1183 table = table_from_rows 1184 1185 return table 1186 1187 def _parse_id_var( 1188 self, 1189 any_token: bool = True, 1190 tokens: t.Optional[t.Collection[TokenType]] = None, 1191 ) -> t.Optional[exp.Expression]: 1192 if self._match_text_seq("IDENTIFIER", "("): 1193 identifier = ( 1194 super()._parse_id_var(any_token=any_token, tokens=tokens) 1195 or self._parse_string() 1196 ) 1197 self._match_r_paren() 1198 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1199 1200 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1201 1202 def _parse_show_snowflake(self, this: str) -> exp.Show: 1203 scope = None 1204 scope_kind = None 1205 1206 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1207 # which is syntactically valid but has no effect on the output 1208 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1209 1210 history = self._match_text_seq("HISTORY") 1211 1212 like = self._parse_string() if self._match(TokenType.LIKE) else None 1213 1214 if self._match(TokenType.IN): 1215 if self._match_text_seq("ACCOUNT"): 1216 scope_kind = "ACCOUNT" 1217 elif self._match_text_seq("CLASS"): 1218 scope_kind = "CLASS" 1219 scope = self._parse_table_parts() 1220 elif self._match_text_seq("APPLICATION"): 1221 scope_kind = "APPLICATION" 1222 if self._match_text_seq("PACKAGE"): 1223 scope_kind += " PACKAGE" 1224 scope = self._parse_table_parts() 1225 elif self._match_set(self.DB_CREATABLES): 1226 scope_kind = self._prev.text.upper() 1227 if self._curr: 1228 scope = self._parse_table_parts() 1229 elif self._curr: 1230 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1231 scope = self._parse_table_parts() 1232 1233 return self.expression( 1234 exp.Show, 1235 **{ 1236 "terse": terse, 1237 "this": this, 1238 "history": history, 1239 "like": like, 1240 "scope": scope, 1241 "scope_kind": scope_kind, 1242 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1243 "limit": self._parse_limit(), 1244 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1245 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1246 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1247 }, 1248 ) 1249 1250 def _parse_put(self) -> exp.Put | exp.Command: 1251 if self._curr.token_type != TokenType.STRING: 1252 return self._parse_as_command(self._prev) 1253 1254 return self.expression( 1255 exp.Put, 1256 this=self._parse_string(), 1257 target=self._parse_location_path(), 1258 properties=self._parse_properties(), 1259 ) 1260 1261 def _parse_get(self) -> t.Optional[exp.Expression]: 1262 start = self._prev 1263 1264 # If we detect GET( then we need to parse a function, not a statement 1265 if self._match(TokenType.L_PAREN): 1266 self._retreat(self._index - 2) 1267 return self._parse_expression() 1268 1269 target = self._parse_location_path() 1270 1271 # Parse as command if unquoted file path 1272 if self._curr.token_type == TokenType.URI_START: 1273 return self._parse_as_command(start) 1274 1275 return self.expression( 1276 exp.Get, 1277 this=self._parse_string(), 1278 target=target, 1279 properties=self._parse_properties(), 1280 ) 1281 1282 def _parse_location_property(self) -> exp.LocationProperty: 1283 self._match(TokenType.EQ) 1284 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1285 1286 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1287 # Parse either a subquery or a staged file 1288 return ( 1289 self._parse_select(table=True, parse_subquery_alias=False) 1290 if self._match(TokenType.L_PAREN, advance=False) 1291 else self._parse_table_parts() 1292 ) 1293 1294 def _parse_location_path(self) -> exp.Var: 1295 start = self._curr 1296 self._advance_any(ignore_reserved=True) 1297 1298 # We avoid consuming a comma token because external tables like @foo and @bar 1299 # can be joined in a query with a comma separator, as well as closing paren 1300 # in case of subqueries 1301 while self._is_connected() and not self._match_set( 1302 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1303 ): 1304 self._advance_any(ignore_reserved=True) 1305 1306 return exp.var(self._find_sql(start, self._prev)) 1307 1308 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1309 this = super()._parse_lambda_arg() 1310 1311 if not this: 1312 return this 1313 1314 typ = self._parse_types() 1315 1316 if typ: 1317 return self.expression(exp.Cast, this=this, to=typ) 1318 1319 return this 1320 1321 def _parse_foreign_key(self) -> exp.ForeignKey: 1322 # inlineFK, the REFERENCES columns are implied 1323 if self._match(TokenType.REFERENCES, advance=False): 1324 return self.expression(exp.ForeignKey) 1325 1326 # outoflineFK, explicitly names the columns 1327 return super()._parse_foreign_key() 1328 1329 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1330 self._match(TokenType.EQ) 1331 if self._match(TokenType.L_PAREN, advance=False): 1332 expressions = self._parse_wrapped_options() 1333 else: 1334 expressions = [self._parse_format_name()] 1335 1336 return self.expression( 1337 exp.FileFormatProperty, 1338 expressions=expressions, 1339 ) 1340 1341 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1342 return self.expression( 1343 exp.CredentialsProperty, 1344 expressions=self._parse_wrapped_options(), 1345 ) 1346 1347 def _parse_semantic_view(self) -> exp.SemanticView: 1348 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1349 1350 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1351 if self._match_text_seq("DIMENSIONS"): 1352 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1353 if self._match_text_seq("METRICS"): 1354 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1355 if self._match_text_seq("WHERE"): 1356 kwargs["where"] = self._parse_expression() 1357 1358 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1360 class Tokenizer(tokens.Tokenizer): 1361 STRING_ESCAPES = ["\\", "'"] 1362 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1363 RAW_STRINGS = ["$$"] 1364 COMMENTS = ["--", "//", ("/*", "*/")] 1365 NESTED_COMMENTS = False 1366 1367 KEYWORDS = { 1368 **tokens.Tokenizer.KEYWORDS, 1369 "BYTEINT": TokenType.INT, 1370 "FILE://": TokenType.URI_START, 1371 "FILE FORMAT": TokenType.FILE_FORMAT, 1372 "GET": TokenType.GET, 1373 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1374 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1375 "MINUS": TokenType.EXCEPT, 1376 "NCHAR VARYING": TokenType.VARCHAR, 1377 "PUT": TokenType.PUT, 1378 "REMOVE": TokenType.COMMAND, 1379 "RM": TokenType.COMMAND, 1380 "SAMPLE": TokenType.TABLE_SAMPLE, 1381 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1382 "SQL_DOUBLE": TokenType.DOUBLE, 1383 "SQL_VARCHAR": TokenType.VARCHAR, 1384 "STAGE": TokenType.STAGE, 1385 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1386 "STREAMLIT": TokenType.STREAMLIT, 1387 "TAG": TokenType.TAG, 1388 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1389 "TOP": TokenType.TOP, 1390 "WAREHOUSE": TokenType.WAREHOUSE, 1391 } 1392 KEYWORDS.pop("/*+") 1393 1394 SINGLE_TOKENS = { 1395 **tokens.Tokenizer.SINGLE_TOKENS, 1396 "$": TokenType.PARAMETER, 1397 "!": TokenType.EXCLAMATION, 1398 } 1399 1400 VAR_SINGLE_TOKENS = {"$"} 1401 1402 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1404 class Generator(generator.Generator): 1405 PARAMETER_TOKEN = "$" 1406 MATCHED_BY_SOURCE = False 1407 SINGLE_STRING_INTERVAL = True 1408 JOIN_HINTS = False 1409 TABLE_HINTS = False 1410 QUERY_HINTS = False 1411 AGGREGATE_FILTER_SUPPORTED = False 1412 SUPPORTS_TABLE_COPY = False 1413 COLLATE_IS_FUNC = True 1414 LIMIT_ONLY_LITERALS = True 1415 JSON_KEY_VALUE_PAIR_SEP = "," 1416 INSERT_OVERWRITE = " OVERWRITE INTO" 1417 STRUCT_DELIMITER = ("(", ")") 1418 COPY_PARAMS_ARE_WRAPPED = False 1419 COPY_PARAMS_EQ_REQUIRED = True 1420 STAR_EXCEPT = "EXCLUDE" 1421 SUPPORTS_EXPLODING_PROJECTIONS = False 1422 ARRAY_CONCAT_IS_VAR_LEN = False 1423 SUPPORTS_CONVERT_TIMEZONE = True 1424 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1425 SUPPORTS_MEDIAN = True 1426 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1427 SUPPORTS_DECODE_CASE = True 1428 IS_BOOL_ALLOWED = False 1429 1430 TRANSFORMS = { 1431 **generator.Generator.TRANSFORMS, 1432 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1433 exp.ArgMax: rename_func("MAX_BY"), 1434 exp.ArgMin: rename_func("MIN_BY"), 1435 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1436 exp.ArrayContains: lambda self, e: self.func( 1437 "ARRAY_CONTAINS", 1438 e.expression 1439 if e.args.get("ensure_variant") is False 1440 else exp.cast(e.expression, exp.DataType.Type.VARIANT, copy=False), 1441 e.this, 1442 ), 1443 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1444 exp.AtTimeZone: lambda self, e: self.func( 1445 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1446 ), 1447 exp.BitwiseOr: rename_func("BITOR"), 1448 exp.BitwiseXor: rename_func("BITXOR"), 1449 exp.BitwiseAnd: rename_func("BITAND"), 1450 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1451 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1452 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1453 exp.BitwiseNot: rename_func("BITNOT"), 1454 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1455 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1456 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1457 exp.DateAdd: date_delta_sql("DATEADD"), 1458 exp.DateDiff: date_delta_sql("DATEDIFF"), 1459 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1460 exp.DatetimeDiff: timestampdiff_sql, 1461 exp.DateStrToDate: datestrtodate_sql, 1462 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1463 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1464 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1465 exp.DayOfYear: rename_func("DAYOFYEAR"), 1466 exp.Explode: rename_func("FLATTEN"), 1467 exp.Extract: lambda self, e: self.func( 1468 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1469 ), 1470 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1471 exp.FileFormatProperty: lambda self, 1472 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1473 exp.FromTimeZone: lambda self, e: self.func( 1474 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1475 ), 1476 exp.GenerateSeries: lambda self, e: self.func( 1477 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1478 ), 1479 exp.GetExtract: rename_func("GET"), 1480 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1481 exp.If: if_sql(name="IFF", false_value="NULL"), 1482 exp.JSONExtractArray: _json_extract_value_array_sql, 1483 exp.JSONExtractScalar: lambda self, e: self.func( 1484 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1485 ), 1486 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1487 exp.JSONPathRoot: lambda *_: "", 1488 exp.JSONValueArray: _json_extract_value_array_sql, 1489 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1490 rename_func("EDITDISTANCE") 1491 ), 1492 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1493 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1494 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1495 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1496 exp.MakeInterval: no_make_interval_sql, 1497 exp.Max: max_or_greatest, 1498 exp.Min: min_or_least, 1499 exp.ParseJSON: lambda self, e: self.func( 1500 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1501 ), 1502 exp.JSONFormat: rename_func("TO_JSON"), 1503 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1504 exp.PercentileCont: transforms.preprocess( 1505 [transforms.add_within_group_for_percentiles] 1506 ), 1507 exp.PercentileDisc: transforms.preprocess( 1508 [transforms.add_within_group_for_percentiles] 1509 ), 1510 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1511 exp.RegexpExtract: _regexpextract_sql, 1512 exp.RegexpExtractAll: _regexpextract_sql, 1513 exp.RegexpILike: _regexpilike_sql, 1514 exp.Rand: rename_func("RANDOM"), 1515 exp.Select: transforms.preprocess( 1516 [ 1517 transforms.eliminate_window_clause, 1518 transforms.eliminate_distinct_on, 1519 transforms.explode_projection_to_unnest(), 1520 transforms.eliminate_semi_and_anti_joins, 1521 _transform_generate_date_array, 1522 _qualify_unnested_columns, 1523 _eliminate_dot_variant_lookup, 1524 ] 1525 ), 1526 exp.SHA: rename_func("SHA1"), 1527 exp.MD5Digest: rename_func("MD5_BINARY"), 1528 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1529 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1530 exp.LowerHex: rename_func("TO_CHAR"), 1531 exp.SortArray: rename_func("ARRAY_SORT"), 1532 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1533 exp.StartsWith: rename_func("STARTSWITH"), 1534 exp.EndsWith: rename_func("ENDSWITH"), 1535 exp.StrPosition: lambda self, e: strposition_sql( 1536 self, e, func_name="CHARINDEX", supports_position=True 1537 ), 1538 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1539 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1540 exp.Stuff: rename_func("INSERT"), 1541 exp.StPoint: rename_func("ST_MAKEPOINT"), 1542 exp.TimeAdd: date_delta_sql("TIMEADD"), 1543 exp.Timestamp: no_timestamp_sql, 1544 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1545 exp.TimestampDiff: lambda self, e: self.func( 1546 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1547 ), 1548 exp.TimestampTrunc: timestamptrunc_sql(), 1549 exp.TimeStrToTime: timestrtotime_sql, 1550 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1551 exp.ToArray: rename_func("TO_ARRAY"), 1552 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1553 exp.ToDouble: rename_func("TO_DOUBLE"), 1554 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1555 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1556 exp.TsOrDsToDate: lambda self, e: self.func( 1557 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1558 ), 1559 exp.TsOrDsToTime: lambda self, e: self.func( 1560 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1561 ), 1562 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1563 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1564 exp.Uuid: rename_func("UUID_STRING"), 1565 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1566 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1567 exp.Xor: rename_func("BOOLXOR"), 1568 exp.ByteLength: rename_func("OCTET_LENGTH"), 1569 } 1570 1571 SUPPORTED_JSON_PATH_PARTS = { 1572 exp.JSONPathKey, 1573 exp.JSONPathRoot, 1574 exp.JSONPathSubscript, 1575 } 1576 1577 TYPE_MAPPING = { 1578 **generator.Generator.TYPE_MAPPING, 1579 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1580 exp.DataType.Type.NESTED: "OBJECT", 1581 exp.DataType.Type.STRUCT: "OBJECT", 1582 exp.DataType.Type.TEXT: "VARCHAR", 1583 } 1584 1585 TOKEN_MAPPING = { 1586 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1587 } 1588 1589 PROPERTIES_LOCATION = { 1590 **generator.Generator.PROPERTIES_LOCATION, 1591 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1592 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1593 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1594 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1595 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1596 } 1597 1598 UNSUPPORTED_VALUES_EXPRESSIONS = { 1599 exp.Map, 1600 exp.StarMap, 1601 exp.Struct, 1602 exp.VarMap, 1603 } 1604 1605 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1606 1607 def with_properties(self, properties: exp.Properties) -> str: 1608 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1609 1610 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1611 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1612 values_as_table = False 1613 1614 return super().values_sql(expression, values_as_table=values_as_table) 1615 1616 def datatype_sql(self, expression: exp.DataType) -> str: 1617 expressions = expression.expressions 1618 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1619 for field_type in expressions: 1620 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1621 if isinstance(field_type, exp.DataType): 1622 return "OBJECT" 1623 if ( 1624 isinstance(field_type, exp.ColumnDef) 1625 and field_type.this 1626 and field_type.this.is_string 1627 ): 1628 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1629 # converting 'foo' into an identifier, we also need to quote it because these 1630 # keys are case-sensitive. For example: 1631 # 1632 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1633 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1634 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1635 1636 return super().datatype_sql(expression) 1637 1638 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1639 return self.func( 1640 "TO_NUMBER", 1641 expression.this, 1642 expression.args.get("format"), 1643 expression.args.get("precision"), 1644 expression.args.get("scale"), 1645 ) 1646 1647 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1648 milli = expression.args.get("milli") 1649 if milli is not None: 1650 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1651 expression.set("nano", milli_to_nano) 1652 1653 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1654 1655 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1656 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1657 return self.func("TO_GEOGRAPHY", expression.this) 1658 if expression.is_type(exp.DataType.Type.GEOMETRY): 1659 return self.func("TO_GEOMETRY", expression.this) 1660 1661 return super().cast_sql(expression, safe_prefix=safe_prefix) 1662 1663 def trycast_sql(self, expression: exp.TryCast) -> str: 1664 value = expression.this 1665 1666 if value.type is None: 1667 from sqlglot.optimizer.annotate_types import annotate_types 1668 1669 value = annotate_types(value, dialect=self.dialect) 1670 1671 # Snowflake requires that TRY_CAST's value be a string 1672 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1673 # if we can deduce that the value is a string, then we can generate TRY_CAST 1674 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1675 return super().trycast_sql(expression) 1676 1677 return self.cast_sql(expression) 1678 1679 def log_sql(self, expression: exp.Log) -> str: 1680 if not expression.expression: 1681 return self.func("LN", expression.this) 1682 1683 return super().log_sql(expression) 1684 1685 def unnest_sql(self, expression: exp.Unnest) -> str: 1686 unnest_alias = expression.args.get("alias") 1687 offset = expression.args.get("offset") 1688 1689 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1690 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1691 1692 columns = [ 1693 exp.to_identifier("seq"), 1694 exp.to_identifier("key"), 1695 exp.to_identifier("path"), 1696 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1697 value, 1698 exp.to_identifier("this"), 1699 ] 1700 1701 if unnest_alias: 1702 unnest_alias.set("columns", columns) 1703 else: 1704 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1705 1706 table_input = self.sql(expression.expressions[0]) 1707 if not table_input.startswith("INPUT =>"): 1708 table_input = f"INPUT => {table_input}" 1709 1710 expression_parent = expression.parent 1711 1712 explode = ( 1713 f"FLATTEN({table_input})" 1714 if isinstance(expression_parent, exp.Lateral) 1715 else f"TABLE(FLATTEN({table_input}))" 1716 ) 1717 alias = self.sql(unnest_alias) 1718 alias = f" AS {alias}" if alias else "" 1719 value = ( 1720 "" 1721 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1722 else f"{value} FROM " 1723 ) 1724 1725 return f"{value}{explode}{alias}" 1726 1727 def show_sql(self, expression: exp.Show) -> str: 1728 terse = "TERSE " if expression.args.get("terse") else "" 1729 history = " HISTORY" if expression.args.get("history") else "" 1730 like = self.sql(expression, "like") 1731 like = f" LIKE {like}" if like else "" 1732 1733 scope = self.sql(expression, "scope") 1734 scope = f" {scope}" if scope else "" 1735 1736 scope_kind = self.sql(expression, "scope_kind") 1737 if scope_kind: 1738 scope_kind = f" IN {scope_kind}" 1739 1740 starts_with = self.sql(expression, "starts_with") 1741 if starts_with: 1742 starts_with = f" STARTS WITH {starts_with}" 1743 1744 limit = self.sql(expression, "limit") 1745 1746 from_ = self.sql(expression, "from") 1747 if from_: 1748 from_ = f" FROM {from_}" 1749 1750 privileges = self.expressions(expression, key="privileges", flat=True) 1751 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1752 1753 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1754 1755 def describe_sql(self, expression: exp.Describe) -> str: 1756 # Default to table if kind is unknown 1757 kind_value = expression.args.get("kind") or "TABLE" 1758 kind = f" {kind_value}" if kind_value else "" 1759 this = f" {self.sql(expression, 'this')}" 1760 expressions = self.expressions(expression, flat=True) 1761 expressions = f" {expressions}" if expressions else "" 1762 return f"DESCRIBE{kind}{this}{expressions}" 1763 1764 def generatedasidentitycolumnconstraint_sql( 1765 self, expression: exp.GeneratedAsIdentityColumnConstraint 1766 ) -> str: 1767 start = expression.args.get("start") 1768 start = f" START {start}" if start else "" 1769 increment = expression.args.get("increment") 1770 increment = f" INCREMENT {increment}" if increment else "" 1771 1772 order = expression.args.get("order") 1773 if order is not None: 1774 order_clause = " ORDER" if order else " NOORDER" 1775 else: 1776 order_clause = "" 1777 1778 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1779 1780 def cluster_sql(self, expression: exp.Cluster) -> str: 1781 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1782 1783 def struct_sql(self, expression: exp.Struct) -> str: 1784 if len(expression.expressions) == 1: 1785 arg = expression.expressions[0] 1786 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1787 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1788 return f"{{{self.sql(expression.expressions[0])}}}" 1789 1790 keys = [] 1791 values = [] 1792 1793 for i, e in enumerate(expression.expressions): 1794 if isinstance(e, exp.PropertyEQ): 1795 keys.append( 1796 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1797 ) 1798 values.append(e.expression) 1799 else: 1800 keys.append(exp.Literal.string(f"_{i}")) 1801 values.append(e) 1802 1803 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1804 1805 @unsupported_args("weight", "accuracy") 1806 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1807 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1808 1809 def alterset_sql(self, expression: exp.AlterSet) -> str: 1810 exprs = self.expressions(expression, flat=True) 1811 exprs = f" {exprs}" if exprs else "" 1812 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1813 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1814 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1815 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1816 tag = self.expressions(expression, key="tag", flat=True) 1817 tag = f" TAG {tag}" if tag else "" 1818 1819 return f"SET{exprs}{file_format}{copy_options}{tag}" 1820 1821 def strtotime_sql(self, expression: exp.StrToTime): 1822 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1823 return self.func( 1824 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1825 ) 1826 1827 def timestampsub_sql(self, expression: exp.TimestampSub): 1828 return self.sql( 1829 exp.TimestampAdd( 1830 this=expression.this, 1831 expression=expression.expression * -1, 1832 unit=expression.unit, 1833 ) 1834 ) 1835 1836 def jsonextract_sql(self, expression: exp.JSONExtract): 1837 this = expression.this 1838 1839 # JSON strings are valid coming from other dialects such as BQ so 1840 # for these cases we PARSE_JSON preemptively 1841 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1842 "requires_json" 1843 ): 1844 this = exp.ParseJSON(this=this) 1845 1846 return self.func( 1847 "GET_PATH", 1848 this, 1849 expression.expression, 1850 ) 1851 1852 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1853 this = expression.this 1854 if this.is_string: 1855 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1856 1857 return self.func("TO_CHAR", this, self.format_time(expression)) 1858 1859 def datesub_sql(self, expression: exp.DateSub) -> str: 1860 value = expression.expression 1861 if value: 1862 value.replace(value * (-1)) 1863 else: 1864 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1865 1866 return date_delta_sql("DATEADD")(self, expression) 1867 1868 def select_sql(self, expression: exp.Select) -> str: 1869 limit = expression.args.get("limit") 1870 offset = expression.args.get("offset") 1871 if offset and not limit: 1872 expression.limit(exp.Null(), copy=False) 1873 return super().select_sql(expression) 1874 1875 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1876 is_materialized = expression.find(exp.MaterializedProperty) 1877 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1878 1879 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1880 # For materialized views, COPY GRANTS is located *before* the columns list 1881 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1882 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1883 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1884 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1885 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1886 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1887 1888 this_name = self.sql(expression.this, "this") 1889 copy_grants = self.sql(copy_grants_property) 1890 this_schema = self.schema_columns_sql(expression.this) 1891 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1892 1893 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1894 1895 return super().createable_sql(expression, locations) 1896 1897 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1898 this = expression.this 1899 1900 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1901 # and add it later as part of the WITHIN GROUP clause 1902 order = this if isinstance(this, exp.Order) else None 1903 if order: 1904 expression.set("this", order.this.pop()) 1905 1906 expr_sql = super().arrayagg_sql(expression) 1907 1908 if order: 1909 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1910 1911 return expr_sql 1912 1913 def array_sql(self, expression: exp.Array) -> str: 1914 expressions = expression.expressions 1915 1916 first_expr = seq_get(expressions, 0) 1917 if isinstance(first_expr, exp.Select): 1918 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1919 if first_expr.text("kind").upper() == "STRUCT": 1920 object_construct_args = [] 1921 for expr in first_expr.expressions: 1922 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1923 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1924 name = expr.this if isinstance(expr, exp.Alias) else expr 1925 1926 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1927 1928 array_agg = exp.ArrayAgg( 1929 this=_build_object_construct(args=object_construct_args) 1930 ) 1931 1932 first_expr.set("kind", None) 1933 first_expr.set("expressions", [array_agg]) 1934 1935 return self.sql(first_expr.subquery()) 1936 1937 return inline_array_sql(self, expression) 1938 1939 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1940 zone = self.sql(expression, "this") 1941 if not zone: 1942 return super().currentdate_sql(expression) 1943 1944 expr = exp.Cast( 1945 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1946 to=exp.DataType(this=exp.DataType.Type.DATE), 1947 ) 1948 return self.sql(expr) 1949 1950 def dot_sql(self, expression: exp.Dot) -> str: 1951 this = expression.this 1952 1953 if not this.type: 1954 from sqlglot.optimizer.annotate_types import annotate_types 1955 1956 this = annotate_types(this, dialect=self.dialect) 1957 1958 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1959 # Generate colon notation for the top level STRUCT 1960 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1961 1962 return super().dot_sql(expression) 1963 1964 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1965 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1966 1967 def format_sql(self, expression: exp.Format) -> str: 1968 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1969 return self.func("TO_CHAR", expression.expressions[0]) 1970 1971 return self.function_fallback_sql(expression) 1972 1973 def splitpart_sql(self, expression: exp.SplitPart) -> str: 1974 # Set part_index to 1 if missing 1975 if not expression.args.get("delimiter"): 1976 expression.set("delimiter", exp.Literal.string(" ")) 1977 1978 if not expression.args.get("part_index"): 1979 expression.set("part_index", exp.Literal.number(1)) 1980 1981 return rename_func("SPLIT_PART")(self, expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1616 def datatype_sql(self, expression: exp.DataType) -> str: 1617 expressions = expression.expressions 1618 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1619 for field_type in expressions: 1620 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1621 if isinstance(field_type, exp.DataType): 1622 return "OBJECT" 1623 if ( 1624 isinstance(field_type, exp.ColumnDef) 1625 and field_type.this 1626 and field_type.this.is_string 1627 ): 1628 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1629 # converting 'foo' into an identifier, we also need to quote it because these 1630 # keys are case-sensitive. For example: 1631 # 1632 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1633 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1634 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1635 1636 return super().datatype_sql(expression)
1647 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1648 milli = expression.args.get("milli") 1649 if milli is not None: 1650 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1651 expression.set("nano", milli_to_nano) 1652 1653 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1655 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1656 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1657 return self.func("TO_GEOGRAPHY", expression.this) 1658 if expression.is_type(exp.DataType.Type.GEOMETRY): 1659 return self.func("TO_GEOMETRY", expression.this) 1660 1661 return super().cast_sql(expression, safe_prefix=safe_prefix)
1663 def trycast_sql(self, expression: exp.TryCast) -> str: 1664 value = expression.this 1665 1666 if value.type is None: 1667 from sqlglot.optimizer.annotate_types import annotate_types 1668 1669 value = annotate_types(value, dialect=self.dialect) 1670 1671 # Snowflake requires that TRY_CAST's value be a string 1672 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1673 # if we can deduce that the value is a string, then we can generate TRY_CAST 1674 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1675 return super().trycast_sql(expression) 1676 1677 return self.cast_sql(expression)
1685 def unnest_sql(self, expression: exp.Unnest) -> str: 1686 unnest_alias = expression.args.get("alias") 1687 offset = expression.args.get("offset") 1688 1689 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1690 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1691 1692 columns = [ 1693 exp.to_identifier("seq"), 1694 exp.to_identifier("key"), 1695 exp.to_identifier("path"), 1696 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1697 value, 1698 exp.to_identifier("this"), 1699 ] 1700 1701 if unnest_alias: 1702 unnest_alias.set("columns", columns) 1703 else: 1704 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1705 1706 table_input = self.sql(expression.expressions[0]) 1707 if not table_input.startswith("INPUT =>"): 1708 table_input = f"INPUT => {table_input}" 1709 1710 expression_parent = expression.parent 1711 1712 explode = ( 1713 f"FLATTEN({table_input})" 1714 if isinstance(expression_parent, exp.Lateral) 1715 else f"TABLE(FLATTEN({table_input}))" 1716 ) 1717 alias = self.sql(unnest_alias) 1718 alias = f" AS {alias}" if alias else "" 1719 value = ( 1720 "" 1721 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1722 else f"{value} FROM " 1723 ) 1724 1725 return f"{value}{explode}{alias}"
1727 def show_sql(self, expression: exp.Show) -> str: 1728 terse = "TERSE " if expression.args.get("terse") else "" 1729 history = " HISTORY" if expression.args.get("history") else "" 1730 like = self.sql(expression, "like") 1731 like = f" LIKE {like}" if like else "" 1732 1733 scope = self.sql(expression, "scope") 1734 scope = f" {scope}" if scope else "" 1735 1736 scope_kind = self.sql(expression, "scope_kind") 1737 if scope_kind: 1738 scope_kind = f" IN {scope_kind}" 1739 1740 starts_with = self.sql(expression, "starts_with") 1741 if starts_with: 1742 starts_with = f" STARTS WITH {starts_with}" 1743 1744 limit = self.sql(expression, "limit") 1745 1746 from_ = self.sql(expression, "from") 1747 if from_: 1748 from_ = f" FROM {from_}" 1749 1750 privileges = self.expressions(expression, key="privileges", flat=True) 1751 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1752 1753 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1755 def describe_sql(self, expression: exp.Describe) -> str: 1756 # Default to table if kind is unknown 1757 kind_value = expression.args.get("kind") or "TABLE" 1758 kind = f" {kind_value}" if kind_value else "" 1759 this = f" {self.sql(expression, 'this')}" 1760 expressions = self.expressions(expression, flat=True) 1761 expressions = f" {expressions}" if expressions else "" 1762 return f"DESCRIBE{kind}{this}{expressions}"
1764 def generatedasidentitycolumnconstraint_sql( 1765 self, expression: exp.GeneratedAsIdentityColumnConstraint 1766 ) -> str: 1767 start = expression.args.get("start") 1768 start = f" START {start}" if start else "" 1769 increment = expression.args.get("increment") 1770 increment = f" INCREMENT {increment}" if increment else "" 1771 1772 order = expression.args.get("order") 1773 if order is not None: 1774 order_clause = " ORDER" if order else " NOORDER" 1775 else: 1776 order_clause = "" 1777 1778 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1783 def struct_sql(self, expression: exp.Struct) -> str: 1784 if len(expression.expressions) == 1: 1785 arg = expression.expressions[0] 1786 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1787 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1788 return f"{{{self.sql(expression.expressions[0])}}}" 1789 1790 keys = [] 1791 values = [] 1792 1793 for i, e in enumerate(expression.expressions): 1794 if isinstance(e, exp.PropertyEQ): 1795 keys.append( 1796 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1797 ) 1798 values.append(e.expression) 1799 else: 1800 keys.append(exp.Literal.string(f"_{i}")) 1801 values.append(e) 1802 1803 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1809 def alterset_sql(self, expression: exp.AlterSet) -> str: 1810 exprs = self.expressions(expression, flat=True) 1811 exprs = f" {exprs}" if exprs else "" 1812 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1813 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1814 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1815 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1816 tag = self.expressions(expression, key="tag", flat=True) 1817 tag = f" TAG {tag}" if tag else "" 1818 1819 return f"SET{exprs}{file_format}{copy_options}{tag}"
1836 def jsonextract_sql(self, expression: exp.JSONExtract): 1837 this = expression.this 1838 1839 # JSON strings are valid coming from other dialects such as BQ so 1840 # for these cases we PARSE_JSON preemptively 1841 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1842 "requires_json" 1843 ): 1844 this = exp.ParseJSON(this=this) 1845 1846 return self.func( 1847 "GET_PATH", 1848 this, 1849 expression.expression, 1850 )
1859 def datesub_sql(self, expression: exp.DateSub) -> str: 1860 value = expression.expression 1861 if value: 1862 value.replace(value * (-1)) 1863 else: 1864 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1865 1866 return date_delta_sql("DATEADD")(self, expression)
1875 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1876 is_materialized = expression.find(exp.MaterializedProperty) 1877 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1878 1879 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1880 # For materialized views, COPY GRANTS is located *before* the columns list 1881 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1882 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1883 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1884 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1885 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1886 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1887 1888 this_name = self.sql(expression.this, "this") 1889 copy_grants = self.sql(copy_grants_property) 1890 this_schema = self.schema_columns_sql(expression.this) 1891 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1892 1893 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1894 1895 return super().createable_sql(expression, locations)
1897 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1898 this = expression.this 1899 1900 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1901 # and add it later as part of the WITHIN GROUP clause 1902 order = this if isinstance(this, exp.Order) else None 1903 if order: 1904 expression.set("this", order.this.pop()) 1905 1906 expr_sql = super().arrayagg_sql(expression) 1907 1908 if order: 1909 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1910 1911 return expr_sql
1913 def array_sql(self, expression: exp.Array) -> str: 1914 expressions = expression.expressions 1915 1916 first_expr = seq_get(expressions, 0) 1917 if isinstance(first_expr, exp.Select): 1918 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1919 if first_expr.text("kind").upper() == "STRUCT": 1920 object_construct_args = [] 1921 for expr in first_expr.expressions: 1922 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1923 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1924 name = expr.this if isinstance(expr, exp.Alias) else expr 1925 1926 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1927 1928 array_agg = exp.ArrayAgg( 1929 this=_build_object_construct(args=object_construct_args) 1930 ) 1931 1932 first_expr.set("kind", None) 1933 first_expr.set("expressions", [array_agg]) 1934 1935 return self.sql(first_expr.subquery()) 1936 1937 return inline_array_sql(self, expression)
1939 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1940 zone = self.sql(expression, "this") 1941 if not zone: 1942 return super().currentdate_sql(expression) 1943 1944 expr = exp.Cast( 1945 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1946 to=exp.DataType(this=exp.DataType.Type.DATE), 1947 ) 1948 return self.sql(expr)
1950 def dot_sql(self, expression: exp.Dot) -> str: 1951 this = expression.this 1952 1953 if not this.type: 1954 from sqlglot.optimizer.annotate_types import annotate_types 1955 1956 this = annotate_types(this, dialect=self.dialect) 1957 1958 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1959 # Generate colon notation for the top level STRUCT 1960 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1961 1962 return super().dot_sql(expression)
1973 def splitpart_sql(self, expression: exp.SplitPart) -> str: 1974 # Set part_index to 1 if missing 1975 if not expression.args.get("delimiter"): 1976 expression.set("delimiter", exp.Literal.string(" ")) 1977 1978 if not expression.args.get("part_index"): 1979 expression.set("part_index", exp.Literal.number(1)) 1980 1981 return rename_func("SPLIT_PART")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- directorystage_sql