sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import build_scope, find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind)) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 257 unnest_parent = unnest.parent 258 if isinstance(unnest_parent, exp.Join): 259 select = unnest_parent.parent 260 if isinstance(select, exp.Select): 261 replace_column_name = ( 262 sequence_value_name 263 if isinstance(sequence_value_name, str) 264 else sequence_value_name.name 265 ) 266 267 scope = build_scope(select) 268 if scope: 269 for column in scope.columns: 270 if column.name.lower() == replace_column_name.lower(): 271 column.replace( 272 date_add.as_(replace_column_name) 273 if isinstance(column.parent, exp.Select) 274 else date_add 275 ) 276 277 lateral = exp.Lateral(this=unnest_parent.this.pop()) 278 unnest_parent.replace(exp.Join(this=lateral)) 279 else: 280 unnest.replace( 281 exp.select(date_add.as_(sequence_value_name)) 282 .from_(unnest.copy()) 283 .subquery(unnest_alias) 284 ) 285 286 287def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 288 if isinstance(expression, exp.Select): 289 for generate_date_array in expression.find_all(exp.GenerateDateArray): 290 parent = generate_date_array.parent 291 292 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 293 # query is the following (it'll be unnested properly on the next iteration due to copy): 294 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 295 if not isinstance(parent, exp.Unnest): 296 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 297 generate_date_array.replace( 298 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 299 ) 300 301 if ( 302 isinstance(parent, exp.Unnest) 303 and isinstance(parent.parent, (exp.From, exp.Join)) 304 and len(parent.expressions) == 1 305 ): 306 _unnest_generate_date_array(parent) 307 308 return expression 309 310 311def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 312 def _builder(args: t.List) -> E: 313 return expr_type( 314 this=seq_get(args, 0), 315 expression=seq_get(args, 1), 316 position=seq_get(args, 2), 317 occurrence=seq_get(args, 3), 318 parameters=seq_get(args, 4), 319 group=seq_get(args, 5) or exp.Literal.number(0), 320 ) 321 322 return _builder 323 324 325def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 326 # Other dialects don't support all of the following parameters, so we need to 327 # generate default values as necessary to ensure the transpilation is correct 328 group = expression.args.get("group") 329 330 # To avoid generating all these default values, we set group to None if 331 # it's 0 (also default value) which doesn't trigger the following chain 332 if group and group.name == "0": 333 group = None 334 335 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 336 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 337 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 338 339 return self.func( 340 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 341 expression.this, 342 expression.expression, 343 position, 344 occurrence, 345 parameters, 346 group, 347 ) 348 349 350def _json_extract_value_array_sql( 351 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 352) -> str: 353 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 354 ident = exp.to_identifier("x") 355 356 if isinstance(expression, exp.JSONValueArray): 357 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 358 else: 359 this = exp.ParseJSON(this=f"TO_JSON({ident})") 360 361 transform_lambda = exp.Lambda(expressions=[ident], this=this) 362 363 return self.func("TRANSFORM", json_extract, transform_lambda) 364 365 366def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 367 if isinstance(expression, exp.Select): 368 scope = build_scope(expression) 369 if not scope: 370 return expression 371 372 unnests = list(scope.find_all(exp.Unnest)) 373 374 if not unnests: 375 return expression 376 377 taken_source_names = set(scope.sources) 378 column_source: t.Dict[str, exp.Identifier] = {} 379 380 unnest_identifier: t.Optional[exp.Identifier] = None 381 orig_expression = expression.copy() 382 383 for unnest in unnests: 384 if not isinstance(unnest.parent, (exp.From, exp.Join)): 385 continue 386 387 # Try to infer column names produced by an unnest operator. This is only possible 388 # when we can peek into the (statically known) contents of the unnested value. 389 unnest_columns: t.Set[str] = set() 390 for unnest_expr in unnest.expressions: 391 if not isinstance(unnest_expr, exp.Array): 392 continue 393 394 for array_expr in unnest_expr.expressions: 395 if not ( 396 isinstance(array_expr, exp.Struct) 397 and array_expr.expressions 398 and all( 399 isinstance(struct_expr, exp.PropertyEQ) 400 for struct_expr in array_expr.expressions 401 ) 402 ): 403 continue 404 405 unnest_columns.update( 406 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 407 ) 408 break 409 410 if unnest_columns: 411 break 412 413 unnest_alias = unnest.args.get("alias") 414 if not unnest_alias: 415 alias_name = find_new_name(taken_source_names, "value") 416 taken_source_names.add(alias_name) 417 418 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 419 # will be corrected later, when we generate SQL for the `Unnest` AST node. 420 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 421 scope.replace(unnest, aliased_unnest) 422 423 unnest_identifier = aliased_unnest.args["alias"].columns[0] 424 else: 425 alias_columns = getattr(unnest_alias, "columns", []) 426 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 427 428 if not isinstance(unnest_identifier, exp.Identifier): 429 return orig_expression 430 431 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 432 433 for column in scope.columns: 434 if column.table: 435 continue 436 437 table = column_source.get(column.name.lower()) 438 if ( 439 unnest_identifier 440 and not table 441 and len(scope.sources) == 1 442 and column.name.lower() != unnest_identifier.name.lower() 443 ): 444 table = unnest_identifier 445 446 column.set("table", table and table.copy()) 447 448 return expression 449 450 451def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 452 if isinstance(expression, exp.Select): 453 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 454 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 455 # by Snowflake's parser. 456 # 457 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 458 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 459 unnest_aliases = set() 460 for unnest in find_all_in_scope(expression, exp.Unnest): 461 unnest_alias = unnest.args.get("alias") 462 if ( 463 isinstance(unnest_alias, exp.TableAlias) 464 and (unnest_alias.args.get("column_only") or not unnest_alias.this) 465 and len(unnest_alias.columns) == 1 466 ): 467 unnest_aliases.add(unnest_alias.columns[0].name) 468 469 if unnest_aliases: 470 for c in find_all_in_scope(expression, exp.Column): 471 if c.table in unnest_aliases: 472 bracket_lhs = c.args["table"] 473 bracket_rhs = exp.Literal.string(c.name) 474 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 475 476 if c.parent is expression: 477 # Retain column projection names by using aliases 478 c.replace(exp.alias_(bracket, c.this.copy())) 479 else: 480 c.replace(bracket) 481 482 return expression 483 484 485class Snowflake(Dialect): 486 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 487 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 488 NULL_ORDERING = "nulls_are_large" 489 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 490 SUPPORTS_USER_DEFINED_TYPES = False 491 SUPPORTS_SEMI_ANTI_JOIN = False 492 PREFER_CTE_ALIAS_COLUMN = True 493 TABLESAMPLE_SIZE_IS_PERCENT = True 494 COPY_PARAMS_ARE_CSV = False 495 ARRAY_AGG_INCLUDES_NULLS = None 496 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 497 498 TIME_MAPPING = { 499 "YYYY": "%Y", 500 "yyyy": "%Y", 501 "YY": "%y", 502 "yy": "%y", 503 "MMMM": "%B", 504 "mmmm": "%B", 505 "MON": "%b", 506 "mon": "%b", 507 "MM": "%m", 508 "mm": "%m", 509 "DD": "%d", 510 "dd": "%-d", 511 "DY": "%a", 512 "dy": "%w", 513 "HH24": "%H", 514 "hh24": "%H", 515 "HH12": "%I", 516 "hh12": "%I", 517 "MI": "%M", 518 "mi": "%M", 519 "SS": "%S", 520 "ss": "%S", 521 "FF6": "%f", 522 "ff6": "%f", 523 } 524 525 DATE_PART_MAPPING = { 526 **Dialect.DATE_PART_MAPPING, 527 "ISOWEEK": "WEEKISO", 528 } 529 530 def quote_identifier(self, expression: E, identify: bool = True) -> E: 531 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 532 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 533 if ( 534 isinstance(expression, exp.Identifier) 535 and isinstance(expression.parent, exp.Table) 536 and expression.name.lower() == "dual" 537 ): 538 return expression # type: ignore 539 540 return super().quote_identifier(expression, identify=identify) 541 542 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 543 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 544 SINGLE_TOKENS.pop("$") 545 546 class Parser(parser.Parser): 547 IDENTIFY_PIVOT_STRINGS = True 548 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 549 COLON_IS_VARIANT_EXTRACT = True 550 551 ID_VAR_TOKENS = { 552 *parser.Parser.ID_VAR_TOKENS, 553 TokenType.MATCH_CONDITION, 554 } 555 556 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 557 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 558 559 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 560 561 FUNCTIONS = { 562 **parser.Parser.FUNCTIONS, 563 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 564 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 565 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 566 this=seq_get(args, 1), expression=seq_get(args, 0) 567 ), 568 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 569 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 570 start=seq_get(args, 0), 571 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 572 step=seq_get(args, 2), 573 ), 574 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 575 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 576 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 577 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 578 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 579 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 580 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 581 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 582 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 583 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 584 "DATE_TRUNC": _date_trunc_to_time, 585 "DATEADD": _build_date_time_add(exp.DateAdd), 586 "DATEDIFF": _build_datediff, 587 "DIV0": _build_if_from_div0, 588 "EDITDISTANCE": lambda args: exp.Levenshtein( 589 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 590 ), 591 "FLATTEN": exp.Explode.from_arg_list, 592 "GET_PATH": lambda args, dialect: exp.JSONExtract( 593 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 594 ), 595 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 596 "IFF": exp.If.from_arg_list, 597 "LAST_DAY": lambda args: exp.LastDay( 598 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 599 ), 600 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 601 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 602 "NULLIFZERO": _build_if_from_nullifzero, 603 "OBJECT_CONSTRUCT": _build_object_construct, 604 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 605 "REGEXP_REPLACE": _build_regexp_replace, 606 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 607 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 608 "REPLACE": build_replace_with_optional_replacement, 609 "RLIKE": exp.RegexpLike.from_arg_list, 610 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 611 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 612 "TIMEADD": _build_date_time_add(exp.TimeAdd), 613 "TIMEDIFF": _build_datediff, 614 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 615 "TIMESTAMPDIFF": _build_datediff, 616 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 617 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 618 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 619 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 620 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 621 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 622 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 623 "TRY_TO_TIMESTAMP": _build_datetime( 624 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 625 ), 626 "TO_CHAR": build_timetostr_or_tochar, 627 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 628 "TO_NUMBER": lambda args: exp.ToNumber( 629 this=seq_get(args, 0), 630 format=seq_get(args, 1), 631 precision=seq_get(args, 2), 632 scale=seq_get(args, 3), 633 ), 634 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 635 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 636 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 637 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 638 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 639 "TO_VARCHAR": exp.ToChar.from_arg_list, 640 "ZEROIFNULL": _build_if_from_zeroifnull, 641 } 642 643 FUNCTION_PARSERS = { 644 **parser.Parser.FUNCTION_PARSERS, 645 "DATE_PART": lambda self: self._parse_date_part(), 646 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 647 "LISTAGG": lambda self: self._parse_string_agg(), 648 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 649 } 650 FUNCTION_PARSERS.pop("TRIM") 651 652 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 653 654 RANGE_PARSERS = { 655 **parser.Parser.RANGE_PARSERS, 656 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 657 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 658 } 659 660 ALTER_PARSERS = { 661 **parser.Parser.ALTER_PARSERS, 662 "UNSET": lambda self: self.expression( 663 exp.Set, 664 tag=self._match_text_seq("TAG"), 665 expressions=self._parse_csv(self._parse_id_var), 666 unset=True, 667 ), 668 } 669 670 STATEMENT_PARSERS = { 671 **parser.Parser.STATEMENT_PARSERS, 672 TokenType.GET: lambda self: self._parse_get(), 673 TokenType.PUT: lambda self: self._parse_put(), 674 TokenType.SHOW: lambda self: self._parse_show(), 675 } 676 677 PROPERTY_PARSERS = { 678 **parser.Parser.PROPERTY_PARSERS, 679 "CREDENTIALS": lambda self: self._parse_credentials_property(), 680 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 681 "LOCATION": lambda self: self._parse_location_property(), 682 "TAG": lambda self: self._parse_tag(), 683 "USING": lambda self: self._match_text_seq("TEMPLATE") 684 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 685 } 686 687 TYPE_CONVERTERS = { 688 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 689 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 690 } 691 692 SHOW_PARSERS = { 693 "DATABASES": _show_parser("DATABASES"), 694 "TERSE DATABASES": _show_parser("DATABASES"), 695 "SCHEMAS": _show_parser("SCHEMAS"), 696 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 697 "OBJECTS": _show_parser("OBJECTS"), 698 "TERSE OBJECTS": _show_parser("OBJECTS"), 699 "TABLES": _show_parser("TABLES"), 700 "TERSE TABLES": _show_parser("TABLES"), 701 "VIEWS": _show_parser("VIEWS"), 702 "TERSE VIEWS": _show_parser("VIEWS"), 703 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "SEQUENCES": _show_parser("SEQUENCES"), 710 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 711 "STAGES": _show_parser("STAGES"), 712 "COLUMNS": _show_parser("COLUMNS"), 713 "USERS": _show_parser("USERS"), 714 "TERSE USERS": _show_parser("USERS"), 715 "FILE FORMATS": _show_parser("FILE FORMATS"), 716 "FUNCTIONS": _show_parser("FUNCTIONS"), 717 "PROCEDURES": _show_parser("PROCEDURES"), 718 "WAREHOUSES": _show_parser("WAREHOUSES"), 719 } 720 721 CONSTRAINT_PARSERS = { 722 **parser.Parser.CONSTRAINT_PARSERS, 723 "WITH": lambda self: self._parse_with_constraint(), 724 "MASKING": lambda self: self._parse_with_constraint(), 725 "PROJECTION": lambda self: self._parse_with_constraint(), 726 "TAG": lambda self: self._parse_with_constraint(), 727 } 728 729 STAGED_FILE_SINGLE_TOKENS = { 730 TokenType.DOT, 731 TokenType.MOD, 732 TokenType.SLASH, 733 } 734 735 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 736 737 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 738 739 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 740 741 LAMBDAS = { 742 **parser.Parser.LAMBDAS, 743 TokenType.ARROW: lambda self, expressions: self.expression( 744 exp.Lambda, 745 this=self._replace_lambda( 746 self._parse_assignment(), 747 expressions, 748 ), 749 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 750 ), 751 } 752 753 def _parse_use(self) -> exp.Use: 754 if self._match_text_seq("SECONDARY", "ROLES"): 755 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 756 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 757 return self.expression( 758 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 759 ) 760 761 return super()._parse_use() 762 763 def _negate_range( 764 self, this: t.Optional[exp.Expression] = None 765 ) -> t.Optional[exp.Expression]: 766 if not this: 767 return this 768 769 query = this.args.get("query") 770 if isinstance(this, exp.In) and isinstance(query, exp.Query): 771 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 772 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 773 # which can produce different results (most likely a SnowFlake bug). 774 # 775 # https://docs.snowflake.com/en/sql-reference/functions/in 776 # Context: https://github.com/tobymao/sqlglot/issues/3890 777 return self.expression( 778 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 779 ) 780 781 return self.expression(exp.Not, this=this) 782 783 def _parse_tag(self) -> exp.Tags: 784 return self.expression( 785 exp.Tags, 786 expressions=self._parse_wrapped_csv(self._parse_property), 787 ) 788 789 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 790 if self._prev.token_type != TokenType.WITH: 791 self._retreat(self._index - 1) 792 793 if self._match_text_seq("MASKING", "POLICY"): 794 policy = self._parse_column() 795 return self.expression( 796 exp.MaskingPolicyColumnConstraint, 797 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 798 expressions=self._match(TokenType.USING) 799 and self._parse_wrapped_csv(self._parse_id_var), 800 ) 801 if self._match_text_seq("PROJECTION", "POLICY"): 802 policy = self._parse_column() 803 return self.expression( 804 exp.ProjectionPolicyColumnConstraint, 805 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 806 ) 807 if self._match(TokenType.TAG): 808 return self._parse_tag() 809 810 return None 811 812 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 813 if self._match(TokenType.TAG): 814 return self._parse_tag() 815 816 return super()._parse_with_property() 817 818 def _parse_create(self) -> exp.Create | exp.Command: 819 expression = super()._parse_create() 820 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 821 # Replace the Table node with the enclosed Identifier 822 expression.this.replace(expression.this.this) 823 824 return expression 825 826 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 827 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 828 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 829 this = self._parse_var() or self._parse_type() 830 831 if not this: 832 return None 833 834 self._match(TokenType.COMMA) 835 expression = self._parse_bitwise() 836 this = map_date_part(this) 837 name = this.name.upper() 838 839 if name.startswith("EPOCH"): 840 if name == "EPOCH_MILLISECOND": 841 scale = 10**3 842 elif name == "EPOCH_MICROSECOND": 843 scale = 10**6 844 elif name == "EPOCH_NANOSECOND": 845 scale = 10**9 846 else: 847 scale = None 848 849 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 850 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 851 852 if scale: 853 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 854 855 return to_unix 856 857 return self.expression(exp.Extract, this=this, expression=expression) 858 859 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 860 if is_map: 861 # Keys are strings in Snowflake's objects, see also: 862 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 863 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 864 return self._parse_slice(self._parse_string()) 865 866 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 867 868 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 869 lateral = super()._parse_lateral() 870 if not lateral: 871 return lateral 872 873 if isinstance(lateral.this, exp.Explode): 874 table_alias = lateral.args.get("alias") 875 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 876 if table_alias and not table_alias.args.get("columns"): 877 table_alias.set("columns", columns) 878 elif not table_alias: 879 exp.alias_(lateral, "_flattened", table=columns, copy=False) 880 881 return lateral 882 883 def _parse_table_parts( 884 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 885 ) -> exp.Table: 886 # https://docs.snowflake.com/en/user-guide/querying-stage 887 if self._match(TokenType.STRING, advance=False): 888 table = self._parse_string() 889 elif self._match_text_seq("@", advance=False): 890 table = self._parse_location_path() 891 else: 892 table = None 893 894 if table: 895 file_format = None 896 pattern = None 897 898 wrapped = self._match(TokenType.L_PAREN) 899 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 900 if self._match_text_seq("FILE_FORMAT", "=>"): 901 file_format = self._parse_string() or super()._parse_table_parts( 902 is_db_reference=is_db_reference 903 ) 904 elif self._match_text_seq("PATTERN", "=>"): 905 pattern = self._parse_string() 906 else: 907 break 908 909 self._match(TokenType.COMMA) 910 911 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 912 else: 913 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 914 915 return table 916 917 def _parse_table( 918 self, 919 schema: bool = False, 920 joins: bool = False, 921 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 922 parse_bracket: bool = False, 923 is_db_reference: bool = False, 924 parse_partition: bool = False, 925 consume_pipe: bool = False, 926 ) -> t.Optional[exp.Expression]: 927 table = super()._parse_table( 928 schema=schema, 929 joins=joins, 930 alias_tokens=alias_tokens, 931 parse_bracket=parse_bracket, 932 is_db_reference=is_db_reference, 933 parse_partition=parse_partition, 934 ) 935 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 936 table_from_rows = table.this 937 for arg in exp.TableFromRows.arg_types: 938 if arg != "this": 939 table_from_rows.set(arg, table.args.get(arg)) 940 941 table = table_from_rows 942 943 return table 944 945 def _parse_id_var( 946 self, 947 any_token: bool = True, 948 tokens: t.Optional[t.Collection[TokenType]] = None, 949 ) -> t.Optional[exp.Expression]: 950 if self._match_text_seq("IDENTIFIER", "("): 951 identifier = ( 952 super()._parse_id_var(any_token=any_token, tokens=tokens) 953 or self._parse_string() 954 ) 955 self._match_r_paren() 956 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 957 958 return super()._parse_id_var(any_token=any_token, tokens=tokens) 959 960 def _parse_show_snowflake(self, this: str) -> exp.Show: 961 scope = None 962 scope_kind = None 963 964 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 965 # which is syntactically valid but has no effect on the output 966 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 967 968 history = self._match_text_seq("HISTORY") 969 970 like = self._parse_string() if self._match(TokenType.LIKE) else None 971 972 if self._match(TokenType.IN): 973 if self._match_text_seq("ACCOUNT"): 974 scope_kind = "ACCOUNT" 975 elif self._match_text_seq("CLASS"): 976 scope_kind = "CLASS" 977 scope = self._parse_table_parts() 978 elif self._match_text_seq("APPLICATION"): 979 scope_kind = "APPLICATION" 980 if self._match_text_seq("PACKAGE"): 981 scope_kind += " PACKAGE" 982 scope = self._parse_table_parts() 983 elif self._match_set(self.DB_CREATABLES): 984 scope_kind = self._prev.text.upper() 985 if self._curr: 986 scope = self._parse_table_parts() 987 elif self._curr: 988 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 989 scope = self._parse_table_parts() 990 991 return self.expression( 992 exp.Show, 993 **{ 994 "terse": terse, 995 "this": this, 996 "history": history, 997 "like": like, 998 "scope": scope, 999 "scope_kind": scope_kind, 1000 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1001 "limit": self._parse_limit(), 1002 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1003 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1004 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1005 }, 1006 ) 1007 1008 def _parse_put(self) -> exp.Put | exp.Command: 1009 if self._curr.token_type != TokenType.STRING: 1010 return self._parse_as_command(self._prev) 1011 1012 return self.expression( 1013 exp.Put, 1014 this=self._parse_string(), 1015 target=self._parse_location_path(), 1016 properties=self._parse_properties(), 1017 ) 1018 1019 def _parse_get(self) -> t.Optional[exp.Expression]: 1020 start = self._prev 1021 1022 # If we detect GET( then we need to parse a function, not a statement 1023 if self._match(TokenType.L_PAREN): 1024 self._retreat(self._index - 2) 1025 return self._parse_expression() 1026 1027 target = self._parse_location_path() 1028 1029 # Parse as command if unquoted file path 1030 if self._curr.token_type == TokenType.URI_START: 1031 return self._parse_as_command(start) 1032 1033 return self.expression( 1034 exp.Get, 1035 this=self._parse_string(), 1036 target=target, 1037 properties=self._parse_properties(), 1038 ) 1039 1040 def _parse_location_property(self) -> exp.LocationProperty: 1041 self._match(TokenType.EQ) 1042 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1043 1044 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1045 # Parse either a subquery or a staged file 1046 return ( 1047 self._parse_select(table=True, parse_subquery_alias=False) 1048 if self._match(TokenType.L_PAREN, advance=False) 1049 else self._parse_table_parts() 1050 ) 1051 1052 def _parse_location_path(self) -> exp.Var: 1053 start = self._curr 1054 self._advance_any(ignore_reserved=True) 1055 1056 # We avoid consuming a comma token because external tables like @foo and @bar 1057 # can be joined in a query with a comma separator, as well as closing paren 1058 # in case of subqueries 1059 while self._is_connected() and not self._match_set( 1060 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1061 ): 1062 self._advance_any(ignore_reserved=True) 1063 1064 return exp.var(self._find_sql(start, self._prev)) 1065 1066 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1067 this = super()._parse_lambda_arg() 1068 1069 if not this: 1070 return this 1071 1072 typ = self._parse_types() 1073 1074 if typ: 1075 return self.expression(exp.Cast, this=this, to=typ) 1076 1077 return this 1078 1079 def _parse_foreign_key(self) -> exp.ForeignKey: 1080 # inlineFK, the REFERENCES columns are implied 1081 if self._match(TokenType.REFERENCES, advance=False): 1082 return self.expression(exp.ForeignKey) 1083 1084 # outoflineFK, explicitly names the columns 1085 return super()._parse_foreign_key() 1086 1087 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1088 self._match(TokenType.EQ) 1089 if self._match(TokenType.L_PAREN, advance=False): 1090 expressions = self._parse_wrapped_options() 1091 else: 1092 expressions = [self._parse_format_name()] 1093 1094 return self.expression( 1095 exp.FileFormatProperty, 1096 expressions=expressions, 1097 ) 1098 1099 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1100 return self.expression( 1101 exp.CredentialsProperty, 1102 expressions=self._parse_wrapped_options(), 1103 ) 1104 1105 def _parse_semantic_view(self) -> exp.SemanticView: 1106 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1107 1108 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1109 if self._match_text_seq("DIMENSIONS"): 1110 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1111 if self._match_text_seq("METRICS"): 1112 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1113 if self._match_text_seq("WHERE"): 1114 kwargs["where"] = self._parse_expression() 1115 1116 return self.expression(exp.SemanticView, **kwargs) 1117 1118 class Tokenizer(tokens.Tokenizer): 1119 STRING_ESCAPES = ["\\", "'"] 1120 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1121 RAW_STRINGS = ["$$"] 1122 COMMENTS = ["--", "//", ("/*", "*/")] 1123 NESTED_COMMENTS = False 1124 1125 KEYWORDS = { 1126 **tokens.Tokenizer.KEYWORDS, 1127 "FILE://": TokenType.URI_START, 1128 "BYTEINT": TokenType.INT, 1129 "EXCLUDE": TokenType.EXCEPT, 1130 "FILE FORMAT": TokenType.FILE_FORMAT, 1131 "GET": TokenType.GET, 1132 "ILIKE ANY": TokenType.ILIKE_ANY, 1133 "LIKE ANY": TokenType.LIKE_ANY, 1134 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1135 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1136 "MINUS": TokenType.EXCEPT, 1137 "NCHAR VARYING": TokenType.VARCHAR, 1138 "PUT": TokenType.PUT, 1139 "REMOVE": TokenType.COMMAND, 1140 "RM": TokenType.COMMAND, 1141 "SAMPLE": TokenType.TABLE_SAMPLE, 1142 "SQL_DOUBLE": TokenType.DOUBLE, 1143 "SQL_VARCHAR": TokenType.VARCHAR, 1144 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1145 "TAG": TokenType.TAG, 1146 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1147 "TOP": TokenType.TOP, 1148 "WAREHOUSE": TokenType.WAREHOUSE, 1149 "STAGE": TokenType.STAGE, 1150 "STREAMLIT": TokenType.STREAMLIT, 1151 } 1152 KEYWORDS.pop("/*+") 1153 1154 SINGLE_TOKENS = { 1155 **tokens.Tokenizer.SINGLE_TOKENS, 1156 "$": TokenType.PARAMETER, 1157 } 1158 1159 VAR_SINGLE_TOKENS = {"$"} 1160 1161 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1162 1163 class Generator(generator.Generator): 1164 PARAMETER_TOKEN = "$" 1165 MATCHED_BY_SOURCE = False 1166 SINGLE_STRING_INTERVAL = True 1167 JOIN_HINTS = False 1168 TABLE_HINTS = False 1169 QUERY_HINTS = False 1170 AGGREGATE_FILTER_SUPPORTED = False 1171 SUPPORTS_TABLE_COPY = False 1172 COLLATE_IS_FUNC = True 1173 LIMIT_ONLY_LITERALS = True 1174 JSON_KEY_VALUE_PAIR_SEP = "," 1175 INSERT_OVERWRITE = " OVERWRITE INTO" 1176 STRUCT_DELIMITER = ("(", ")") 1177 COPY_PARAMS_ARE_WRAPPED = False 1178 COPY_PARAMS_EQ_REQUIRED = True 1179 STAR_EXCEPT = "EXCLUDE" 1180 SUPPORTS_EXPLODING_PROJECTIONS = False 1181 ARRAY_CONCAT_IS_VAR_LEN = False 1182 SUPPORTS_CONVERT_TIMEZONE = True 1183 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1184 SUPPORTS_MEDIAN = True 1185 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1186 SUPPORTS_DECODE_CASE = True 1187 1188 TRANSFORMS = { 1189 **generator.Generator.TRANSFORMS, 1190 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1191 exp.ArgMax: rename_func("MAX_BY"), 1192 exp.ArgMin: rename_func("MIN_BY"), 1193 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1194 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1195 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1196 exp.AtTimeZone: lambda self, e: self.func( 1197 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1198 ), 1199 exp.BitwiseOr: rename_func("BITOR"), 1200 exp.BitwiseXor: rename_func("BITXOR"), 1201 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1202 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1203 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1204 exp.DateAdd: date_delta_sql("DATEADD"), 1205 exp.DateDiff: date_delta_sql("DATEDIFF"), 1206 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1207 exp.DatetimeDiff: timestampdiff_sql, 1208 exp.DateStrToDate: datestrtodate_sql, 1209 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1210 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1211 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1212 exp.DayOfYear: rename_func("DAYOFYEAR"), 1213 exp.Explode: rename_func("FLATTEN"), 1214 exp.Extract: lambda self, e: self.func( 1215 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1216 ), 1217 exp.FileFormatProperty: lambda self, 1218 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1219 exp.FromTimeZone: lambda self, e: self.func( 1220 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1221 ), 1222 exp.GenerateSeries: lambda self, e: self.func( 1223 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1224 ), 1225 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1226 exp.If: if_sql(name="IFF", false_value="NULL"), 1227 exp.JSONExtractArray: _json_extract_value_array_sql, 1228 exp.JSONExtractScalar: lambda self, e: self.func( 1229 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1230 ), 1231 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1232 exp.JSONPathRoot: lambda *_: "", 1233 exp.JSONValueArray: _json_extract_value_array_sql, 1234 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1235 rename_func("EDITDISTANCE") 1236 ), 1237 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1238 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1239 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1240 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1241 exp.MakeInterval: no_make_interval_sql, 1242 exp.Max: max_or_greatest, 1243 exp.Min: min_or_least, 1244 exp.ParseJSON: lambda self, e: self.func( 1245 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1246 ), 1247 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1248 exp.PercentileCont: transforms.preprocess( 1249 [transforms.add_within_group_for_percentiles] 1250 ), 1251 exp.PercentileDisc: transforms.preprocess( 1252 [transforms.add_within_group_for_percentiles] 1253 ), 1254 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1255 exp.RegexpExtract: _regexpextract_sql, 1256 exp.RegexpExtractAll: _regexpextract_sql, 1257 exp.RegexpILike: _regexpilike_sql, 1258 exp.Rand: rename_func("RANDOM"), 1259 exp.Select: transforms.preprocess( 1260 [ 1261 transforms.eliminate_window_clause, 1262 transforms.eliminate_distinct_on, 1263 transforms.explode_projection_to_unnest(), 1264 transforms.eliminate_semi_and_anti_joins, 1265 _transform_generate_date_array, 1266 _qualify_unnested_columns, 1267 _eliminate_dot_variant_lookup, 1268 ] 1269 ), 1270 exp.SHA: rename_func("SHA1"), 1271 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1272 exp.StartsWith: rename_func("STARTSWITH"), 1273 exp.EndsWith: rename_func("ENDSWITH"), 1274 exp.StrPosition: lambda self, e: strposition_sql( 1275 self, e, func_name="CHARINDEX", supports_position=True 1276 ), 1277 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1278 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1279 exp.Stuff: rename_func("INSERT"), 1280 exp.StPoint: rename_func("ST_MAKEPOINT"), 1281 exp.TimeAdd: date_delta_sql("TIMEADD"), 1282 exp.Timestamp: no_timestamp_sql, 1283 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1284 exp.TimestampDiff: lambda self, e: self.func( 1285 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1286 ), 1287 exp.TimestampTrunc: timestamptrunc_sql(), 1288 exp.TimeStrToTime: timestrtotime_sql, 1289 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1290 exp.ToArray: rename_func("TO_ARRAY"), 1291 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1292 exp.ToDouble: rename_func("TO_DOUBLE"), 1293 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1294 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1295 exp.TsOrDsToDate: lambda self, e: self.func( 1296 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1297 ), 1298 exp.TsOrDsToTime: lambda self, e: self.func( 1299 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1300 ), 1301 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1302 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1303 exp.Uuid: rename_func("UUID_STRING"), 1304 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1305 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1306 exp.Xor: rename_func("BOOLXOR"), 1307 } 1308 1309 SUPPORTED_JSON_PATH_PARTS = { 1310 exp.JSONPathKey, 1311 exp.JSONPathRoot, 1312 exp.JSONPathSubscript, 1313 } 1314 1315 TYPE_MAPPING = { 1316 **generator.Generator.TYPE_MAPPING, 1317 exp.DataType.Type.NESTED: "OBJECT", 1318 exp.DataType.Type.STRUCT: "OBJECT", 1319 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1320 } 1321 1322 TOKEN_MAPPING = { 1323 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1324 } 1325 1326 PROPERTIES_LOCATION = { 1327 **generator.Generator.PROPERTIES_LOCATION, 1328 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1329 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1330 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1331 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1332 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1333 } 1334 1335 UNSUPPORTED_VALUES_EXPRESSIONS = { 1336 exp.Map, 1337 exp.StarMap, 1338 exp.Struct, 1339 exp.VarMap, 1340 } 1341 1342 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1343 1344 def with_properties(self, properties: exp.Properties) -> str: 1345 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1346 1347 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1348 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1349 values_as_table = False 1350 1351 return super().values_sql(expression, values_as_table=values_as_table) 1352 1353 def datatype_sql(self, expression: exp.DataType) -> str: 1354 expressions = expression.expressions 1355 if ( 1356 expressions 1357 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1358 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1359 ): 1360 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1361 return "OBJECT" 1362 1363 return super().datatype_sql(expression) 1364 1365 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1366 return self.func( 1367 "TO_NUMBER", 1368 expression.this, 1369 expression.args.get("format"), 1370 expression.args.get("precision"), 1371 expression.args.get("scale"), 1372 ) 1373 1374 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1375 milli = expression.args.get("milli") 1376 if milli is not None: 1377 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1378 expression.set("nano", milli_to_nano) 1379 1380 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1381 1382 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1383 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1384 return self.func("TO_GEOGRAPHY", expression.this) 1385 if expression.is_type(exp.DataType.Type.GEOMETRY): 1386 return self.func("TO_GEOMETRY", expression.this) 1387 1388 return super().cast_sql(expression, safe_prefix=safe_prefix) 1389 1390 def trycast_sql(self, expression: exp.TryCast) -> str: 1391 value = expression.this 1392 1393 if value.type is None: 1394 from sqlglot.optimizer.annotate_types import annotate_types 1395 1396 value = annotate_types(value, dialect=self.dialect) 1397 1398 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1399 return super().trycast_sql(expression) 1400 1401 # TRY_CAST only works for string values in Snowflake 1402 return self.cast_sql(expression) 1403 1404 def log_sql(self, expression: exp.Log) -> str: 1405 if not expression.expression: 1406 return self.func("LN", expression.this) 1407 1408 return super().log_sql(expression) 1409 1410 def unnest_sql(self, expression: exp.Unnest) -> str: 1411 unnest_alias = expression.args.get("alias") 1412 offset = expression.args.get("offset") 1413 1414 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1415 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1416 1417 columns = [ 1418 exp.to_identifier("seq"), 1419 exp.to_identifier("key"), 1420 exp.to_identifier("path"), 1421 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1422 value, 1423 exp.to_identifier("this"), 1424 ] 1425 1426 if unnest_alias: 1427 unnest_alias.set("columns", columns) 1428 else: 1429 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1430 1431 table_input = self.sql(expression.expressions[0]) 1432 if not table_input.startswith("INPUT =>"): 1433 table_input = f"INPUT => {table_input}" 1434 1435 expression_parent = expression.parent 1436 1437 explode = ( 1438 f"FLATTEN({table_input})" 1439 if isinstance(expression_parent, exp.Lateral) 1440 else f"TABLE(FLATTEN({table_input}))" 1441 ) 1442 alias = self.sql(unnest_alias) 1443 alias = f" AS {alias}" if alias else "" 1444 value = ( 1445 "" 1446 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1447 else f"{value} FROM " 1448 ) 1449 1450 return f"{value}{explode}{alias}" 1451 1452 def show_sql(self, expression: exp.Show) -> str: 1453 terse = "TERSE " if expression.args.get("terse") else "" 1454 history = " HISTORY" if expression.args.get("history") else "" 1455 like = self.sql(expression, "like") 1456 like = f" LIKE {like}" if like else "" 1457 1458 scope = self.sql(expression, "scope") 1459 scope = f" {scope}" if scope else "" 1460 1461 scope_kind = self.sql(expression, "scope_kind") 1462 if scope_kind: 1463 scope_kind = f" IN {scope_kind}" 1464 1465 starts_with = self.sql(expression, "starts_with") 1466 if starts_with: 1467 starts_with = f" STARTS WITH {starts_with}" 1468 1469 limit = self.sql(expression, "limit") 1470 1471 from_ = self.sql(expression, "from") 1472 if from_: 1473 from_ = f" FROM {from_}" 1474 1475 privileges = self.expressions(expression, key="privileges", flat=True) 1476 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1477 1478 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1479 1480 def describe_sql(self, expression: exp.Describe) -> str: 1481 # Default to table if kind is unknown 1482 kind_value = expression.args.get("kind") or "TABLE" 1483 kind = f" {kind_value}" if kind_value else "" 1484 this = f" {self.sql(expression, 'this')}" 1485 expressions = self.expressions(expression, flat=True) 1486 expressions = f" {expressions}" if expressions else "" 1487 return f"DESCRIBE{kind}{this}{expressions}" 1488 1489 def generatedasidentitycolumnconstraint_sql( 1490 self, expression: exp.GeneratedAsIdentityColumnConstraint 1491 ) -> str: 1492 start = expression.args.get("start") 1493 start = f" START {start}" if start else "" 1494 increment = expression.args.get("increment") 1495 increment = f" INCREMENT {increment}" if increment else "" 1496 1497 order = expression.args.get("order") 1498 if order is not None: 1499 order_clause = " ORDER" if order else " NOORDER" 1500 else: 1501 order_clause = "" 1502 1503 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1504 1505 def cluster_sql(self, expression: exp.Cluster) -> str: 1506 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1507 1508 def struct_sql(self, expression: exp.Struct) -> str: 1509 keys = [] 1510 values = [] 1511 1512 for i, e in enumerate(expression.expressions): 1513 if isinstance(e, exp.PropertyEQ): 1514 keys.append( 1515 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1516 ) 1517 values.append(e.expression) 1518 else: 1519 keys.append(exp.Literal.string(f"_{i}")) 1520 values.append(e) 1521 1522 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1523 1524 @unsupported_args("weight", "accuracy") 1525 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1526 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1527 1528 def alterset_sql(self, expression: exp.AlterSet) -> str: 1529 exprs = self.expressions(expression, flat=True) 1530 exprs = f" {exprs}" if exprs else "" 1531 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1532 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1533 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1534 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1535 tag = self.expressions(expression, key="tag", flat=True) 1536 tag = f" TAG {tag}" if tag else "" 1537 1538 return f"SET{exprs}{file_format}{copy_options}{tag}" 1539 1540 def strtotime_sql(self, expression: exp.StrToTime): 1541 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1542 return self.func( 1543 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1544 ) 1545 1546 def timestampsub_sql(self, expression: exp.TimestampSub): 1547 return self.sql( 1548 exp.TimestampAdd( 1549 this=expression.this, 1550 expression=expression.expression * -1, 1551 unit=expression.unit, 1552 ) 1553 ) 1554 1555 def jsonextract_sql(self, expression: exp.JSONExtract): 1556 this = expression.this 1557 1558 # JSON strings are valid coming from other dialects such as BQ 1559 return self.func( 1560 "GET_PATH", 1561 exp.ParseJSON(this=this) if this.is_string else this, 1562 expression.expression, 1563 ) 1564 1565 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1566 this = expression.this 1567 if this.is_string: 1568 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1569 1570 return self.func("TO_CHAR", this, self.format_time(expression)) 1571 1572 def datesub_sql(self, expression: exp.DateSub) -> str: 1573 value = expression.expression 1574 if value: 1575 value.replace(value * (-1)) 1576 else: 1577 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1578 1579 return date_delta_sql("DATEADD")(self, expression) 1580 1581 def select_sql(self, expression: exp.Select) -> str: 1582 limit = expression.args.get("limit") 1583 offset = expression.args.get("offset") 1584 if offset and not limit: 1585 expression.limit(exp.Null(), copy=False) 1586 return super().select_sql(expression) 1587 1588 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1589 is_materialized = expression.find(exp.MaterializedProperty) 1590 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1591 1592 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1593 # For materialized views, COPY GRANTS is located *before* the columns list 1594 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1595 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1596 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1597 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1598 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1599 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1600 1601 this_name = self.sql(expression.this, "this") 1602 copy_grants = self.sql(copy_grants_property) 1603 this_schema = self.schema_columns_sql(expression.this) 1604 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1605 1606 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1607 1608 return super().createable_sql(expression, locations) 1609 1610 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1611 this = expression.this 1612 1613 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1614 # and add it later as part of the WITHIN GROUP clause 1615 order = this if isinstance(this, exp.Order) else None 1616 if order: 1617 expression.set("this", order.this.pop()) 1618 1619 expr_sql = super().arrayagg_sql(expression) 1620 1621 if order: 1622 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1623 1624 return expr_sql 1625 1626 def array_sql(self, expression: exp.Array) -> str: 1627 expressions = expression.expressions 1628 1629 first_expr = seq_get(expressions, 0) 1630 if isinstance(first_expr, exp.Select): 1631 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1632 if first_expr.text("kind").upper() == "STRUCT": 1633 object_construct_args = [] 1634 for expr in first_expr.expressions: 1635 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1636 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1637 name = expr.this if isinstance(expr, exp.Alias) else expr 1638 1639 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1640 1641 array_agg = exp.ArrayAgg( 1642 this=_build_object_construct(args=object_construct_args) 1643 ) 1644 1645 first_expr.set("kind", None) 1646 first_expr.set("expressions", [array_agg]) 1647 1648 return self.sql(first_expr.subquery()) 1649 1650 return inline_array_sql(self, expression) 1651 1652 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1653 zone = self.sql(expression, "this") 1654 if not zone: 1655 return super().currentdate_sql(expression) 1656 1657 expr = exp.Cast( 1658 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1659 to=exp.DataType(this=exp.DataType.Type.DATE), 1660 ) 1661 return self.sql(expr)
486class Snowflake(Dialect): 487 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 488 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 489 NULL_ORDERING = "nulls_are_large" 490 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 491 SUPPORTS_USER_DEFINED_TYPES = False 492 SUPPORTS_SEMI_ANTI_JOIN = False 493 PREFER_CTE_ALIAS_COLUMN = True 494 TABLESAMPLE_SIZE_IS_PERCENT = True 495 COPY_PARAMS_ARE_CSV = False 496 ARRAY_AGG_INCLUDES_NULLS = None 497 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 498 499 TIME_MAPPING = { 500 "YYYY": "%Y", 501 "yyyy": "%Y", 502 "YY": "%y", 503 "yy": "%y", 504 "MMMM": "%B", 505 "mmmm": "%B", 506 "MON": "%b", 507 "mon": "%b", 508 "MM": "%m", 509 "mm": "%m", 510 "DD": "%d", 511 "dd": "%-d", 512 "DY": "%a", 513 "dy": "%w", 514 "HH24": "%H", 515 "hh24": "%H", 516 "HH12": "%I", 517 "hh12": "%I", 518 "MI": "%M", 519 "mi": "%M", 520 "SS": "%S", 521 "ss": "%S", 522 "FF6": "%f", 523 "ff6": "%f", 524 } 525 526 DATE_PART_MAPPING = { 527 **Dialect.DATE_PART_MAPPING, 528 "ISOWEEK": "WEEKISO", 529 } 530 531 def quote_identifier(self, expression: E, identify: bool = True) -> E: 532 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 533 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 534 if ( 535 isinstance(expression, exp.Identifier) 536 and isinstance(expression.parent, exp.Table) 537 and expression.name.lower() == "dual" 538 ): 539 return expression # type: ignore 540 541 return super().quote_identifier(expression, identify=identify) 542 543 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 544 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 545 SINGLE_TOKENS.pop("$") 546 547 class Parser(parser.Parser): 548 IDENTIFY_PIVOT_STRINGS = True 549 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 550 COLON_IS_VARIANT_EXTRACT = True 551 552 ID_VAR_TOKENS = { 553 *parser.Parser.ID_VAR_TOKENS, 554 TokenType.MATCH_CONDITION, 555 } 556 557 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 558 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 559 560 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 561 562 FUNCTIONS = { 563 **parser.Parser.FUNCTIONS, 564 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 565 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 566 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 567 this=seq_get(args, 1), expression=seq_get(args, 0) 568 ), 569 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 570 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 571 start=seq_get(args, 0), 572 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 573 step=seq_get(args, 2), 574 ), 575 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 576 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 577 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 578 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 579 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 580 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 581 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 582 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 583 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 584 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 585 "DATE_TRUNC": _date_trunc_to_time, 586 "DATEADD": _build_date_time_add(exp.DateAdd), 587 "DATEDIFF": _build_datediff, 588 "DIV0": _build_if_from_div0, 589 "EDITDISTANCE": lambda args: exp.Levenshtein( 590 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 591 ), 592 "FLATTEN": exp.Explode.from_arg_list, 593 "GET_PATH": lambda args, dialect: exp.JSONExtract( 594 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 595 ), 596 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 597 "IFF": exp.If.from_arg_list, 598 "LAST_DAY": lambda args: exp.LastDay( 599 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 600 ), 601 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 602 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 603 "NULLIFZERO": _build_if_from_nullifzero, 604 "OBJECT_CONSTRUCT": _build_object_construct, 605 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 606 "REGEXP_REPLACE": _build_regexp_replace, 607 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 608 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 609 "REPLACE": build_replace_with_optional_replacement, 610 "RLIKE": exp.RegexpLike.from_arg_list, 611 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 612 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 613 "TIMEADD": _build_date_time_add(exp.TimeAdd), 614 "TIMEDIFF": _build_datediff, 615 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 616 "TIMESTAMPDIFF": _build_datediff, 617 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 618 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 619 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 620 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 621 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 622 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 623 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 624 "TRY_TO_TIMESTAMP": _build_datetime( 625 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 626 ), 627 "TO_CHAR": build_timetostr_or_tochar, 628 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 629 "TO_NUMBER": lambda args: exp.ToNumber( 630 this=seq_get(args, 0), 631 format=seq_get(args, 1), 632 precision=seq_get(args, 2), 633 scale=seq_get(args, 3), 634 ), 635 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 636 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 637 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 638 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 639 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 640 "TO_VARCHAR": exp.ToChar.from_arg_list, 641 "ZEROIFNULL": _build_if_from_zeroifnull, 642 } 643 644 FUNCTION_PARSERS = { 645 **parser.Parser.FUNCTION_PARSERS, 646 "DATE_PART": lambda self: self._parse_date_part(), 647 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 648 "LISTAGG": lambda self: self._parse_string_agg(), 649 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 650 } 651 FUNCTION_PARSERS.pop("TRIM") 652 653 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 654 655 RANGE_PARSERS = { 656 **parser.Parser.RANGE_PARSERS, 657 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 658 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 659 } 660 661 ALTER_PARSERS = { 662 **parser.Parser.ALTER_PARSERS, 663 "UNSET": lambda self: self.expression( 664 exp.Set, 665 tag=self._match_text_seq("TAG"), 666 expressions=self._parse_csv(self._parse_id_var), 667 unset=True, 668 ), 669 } 670 671 STATEMENT_PARSERS = { 672 **parser.Parser.STATEMENT_PARSERS, 673 TokenType.GET: lambda self: self._parse_get(), 674 TokenType.PUT: lambda self: self._parse_put(), 675 TokenType.SHOW: lambda self: self._parse_show(), 676 } 677 678 PROPERTY_PARSERS = { 679 **parser.Parser.PROPERTY_PARSERS, 680 "CREDENTIALS": lambda self: self._parse_credentials_property(), 681 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 682 "LOCATION": lambda self: self._parse_location_property(), 683 "TAG": lambda self: self._parse_tag(), 684 "USING": lambda self: self._match_text_seq("TEMPLATE") 685 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 686 } 687 688 TYPE_CONVERTERS = { 689 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 690 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 691 } 692 693 SHOW_PARSERS = { 694 "DATABASES": _show_parser("DATABASES"), 695 "TERSE DATABASES": _show_parser("DATABASES"), 696 "SCHEMAS": _show_parser("SCHEMAS"), 697 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 698 "OBJECTS": _show_parser("OBJECTS"), 699 "TERSE OBJECTS": _show_parser("OBJECTS"), 700 "TABLES": _show_parser("TABLES"), 701 "TERSE TABLES": _show_parser("TABLES"), 702 "VIEWS": _show_parser("VIEWS"), 703 "TERSE VIEWS": _show_parser("VIEWS"), 704 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 706 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 708 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 710 "SEQUENCES": _show_parser("SEQUENCES"), 711 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 712 "STAGES": _show_parser("STAGES"), 713 "COLUMNS": _show_parser("COLUMNS"), 714 "USERS": _show_parser("USERS"), 715 "TERSE USERS": _show_parser("USERS"), 716 "FILE FORMATS": _show_parser("FILE FORMATS"), 717 "FUNCTIONS": _show_parser("FUNCTIONS"), 718 "PROCEDURES": _show_parser("PROCEDURES"), 719 "WAREHOUSES": _show_parser("WAREHOUSES"), 720 } 721 722 CONSTRAINT_PARSERS = { 723 **parser.Parser.CONSTRAINT_PARSERS, 724 "WITH": lambda self: self._parse_with_constraint(), 725 "MASKING": lambda self: self._parse_with_constraint(), 726 "PROJECTION": lambda self: self._parse_with_constraint(), 727 "TAG": lambda self: self._parse_with_constraint(), 728 } 729 730 STAGED_FILE_SINGLE_TOKENS = { 731 TokenType.DOT, 732 TokenType.MOD, 733 TokenType.SLASH, 734 } 735 736 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 737 738 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 739 740 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 741 742 LAMBDAS = { 743 **parser.Parser.LAMBDAS, 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 751 ), 752 } 753 754 def _parse_use(self) -> exp.Use: 755 if self._match_text_seq("SECONDARY", "ROLES"): 756 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 757 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 758 return self.expression( 759 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 760 ) 761 762 return super()._parse_use() 763 764 def _negate_range( 765 self, this: t.Optional[exp.Expression] = None 766 ) -> t.Optional[exp.Expression]: 767 if not this: 768 return this 769 770 query = this.args.get("query") 771 if isinstance(this, exp.In) and isinstance(query, exp.Query): 772 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 773 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 774 # which can produce different results (most likely a SnowFlake bug). 775 # 776 # https://docs.snowflake.com/en/sql-reference/functions/in 777 # Context: https://github.com/tobymao/sqlglot/issues/3890 778 return self.expression( 779 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 780 ) 781 782 return self.expression(exp.Not, this=this) 783 784 def _parse_tag(self) -> exp.Tags: 785 return self.expression( 786 exp.Tags, 787 expressions=self._parse_wrapped_csv(self._parse_property), 788 ) 789 790 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 791 if self._prev.token_type != TokenType.WITH: 792 self._retreat(self._index - 1) 793 794 if self._match_text_seq("MASKING", "POLICY"): 795 policy = self._parse_column() 796 return self.expression( 797 exp.MaskingPolicyColumnConstraint, 798 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 799 expressions=self._match(TokenType.USING) 800 and self._parse_wrapped_csv(self._parse_id_var), 801 ) 802 if self._match_text_seq("PROJECTION", "POLICY"): 803 policy = self._parse_column() 804 return self.expression( 805 exp.ProjectionPolicyColumnConstraint, 806 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 807 ) 808 if self._match(TokenType.TAG): 809 return self._parse_tag() 810 811 return None 812 813 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 814 if self._match(TokenType.TAG): 815 return self._parse_tag() 816 817 return super()._parse_with_property() 818 819 def _parse_create(self) -> exp.Create | exp.Command: 820 expression = super()._parse_create() 821 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 822 # Replace the Table node with the enclosed Identifier 823 expression.this.replace(expression.this.this) 824 825 return expression 826 827 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 828 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 829 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 830 this = self._parse_var() or self._parse_type() 831 832 if not this: 833 return None 834 835 self._match(TokenType.COMMA) 836 expression = self._parse_bitwise() 837 this = map_date_part(this) 838 name = this.name.upper() 839 840 if name.startswith("EPOCH"): 841 if name == "EPOCH_MILLISECOND": 842 scale = 10**3 843 elif name == "EPOCH_MICROSECOND": 844 scale = 10**6 845 elif name == "EPOCH_NANOSECOND": 846 scale = 10**9 847 else: 848 scale = None 849 850 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 851 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 852 853 if scale: 854 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 855 856 return to_unix 857 858 return self.expression(exp.Extract, this=this, expression=expression) 859 860 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 861 if is_map: 862 # Keys are strings in Snowflake's objects, see also: 863 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 864 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 865 return self._parse_slice(self._parse_string()) 866 867 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 868 869 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 870 lateral = super()._parse_lateral() 871 if not lateral: 872 return lateral 873 874 if isinstance(lateral.this, exp.Explode): 875 table_alias = lateral.args.get("alias") 876 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 877 if table_alias and not table_alias.args.get("columns"): 878 table_alias.set("columns", columns) 879 elif not table_alias: 880 exp.alias_(lateral, "_flattened", table=columns, copy=False) 881 882 return lateral 883 884 def _parse_table_parts( 885 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 886 ) -> exp.Table: 887 # https://docs.snowflake.com/en/user-guide/querying-stage 888 if self._match(TokenType.STRING, advance=False): 889 table = self._parse_string() 890 elif self._match_text_seq("@", advance=False): 891 table = self._parse_location_path() 892 else: 893 table = None 894 895 if table: 896 file_format = None 897 pattern = None 898 899 wrapped = self._match(TokenType.L_PAREN) 900 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 901 if self._match_text_seq("FILE_FORMAT", "=>"): 902 file_format = self._parse_string() or super()._parse_table_parts( 903 is_db_reference=is_db_reference 904 ) 905 elif self._match_text_seq("PATTERN", "=>"): 906 pattern = self._parse_string() 907 else: 908 break 909 910 self._match(TokenType.COMMA) 911 912 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 913 else: 914 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 915 916 return table 917 918 def _parse_table( 919 self, 920 schema: bool = False, 921 joins: bool = False, 922 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 923 parse_bracket: bool = False, 924 is_db_reference: bool = False, 925 parse_partition: bool = False, 926 consume_pipe: bool = False, 927 ) -> t.Optional[exp.Expression]: 928 table = super()._parse_table( 929 schema=schema, 930 joins=joins, 931 alias_tokens=alias_tokens, 932 parse_bracket=parse_bracket, 933 is_db_reference=is_db_reference, 934 parse_partition=parse_partition, 935 ) 936 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 937 table_from_rows = table.this 938 for arg in exp.TableFromRows.arg_types: 939 if arg != "this": 940 table_from_rows.set(arg, table.args.get(arg)) 941 942 table = table_from_rows 943 944 return table 945 946 def _parse_id_var( 947 self, 948 any_token: bool = True, 949 tokens: t.Optional[t.Collection[TokenType]] = None, 950 ) -> t.Optional[exp.Expression]: 951 if self._match_text_seq("IDENTIFIER", "("): 952 identifier = ( 953 super()._parse_id_var(any_token=any_token, tokens=tokens) 954 or self._parse_string() 955 ) 956 self._match_r_paren() 957 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 958 959 return super()._parse_id_var(any_token=any_token, tokens=tokens) 960 961 def _parse_show_snowflake(self, this: str) -> exp.Show: 962 scope = None 963 scope_kind = None 964 965 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 966 # which is syntactically valid but has no effect on the output 967 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 968 969 history = self._match_text_seq("HISTORY") 970 971 like = self._parse_string() if self._match(TokenType.LIKE) else None 972 973 if self._match(TokenType.IN): 974 if self._match_text_seq("ACCOUNT"): 975 scope_kind = "ACCOUNT" 976 elif self._match_text_seq("CLASS"): 977 scope_kind = "CLASS" 978 scope = self._parse_table_parts() 979 elif self._match_text_seq("APPLICATION"): 980 scope_kind = "APPLICATION" 981 if self._match_text_seq("PACKAGE"): 982 scope_kind += " PACKAGE" 983 scope = self._parse_table_parts() 984 elif self._match_set(self.DB_CREATABLES): 985 scope_kind = self._prev.text.upper() 986 if self._curr: 987 scope = self._parse_table_parts() 988 elif self._curr: 989 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 990 scope = self._parse_table_parts() 991 992 return self.expression( 993 exp.Show, 994 **{ 995 "terse": terse, 996 "this": this, 997 "history": history, 998 "like": like, 999 "scope": scope, 1000 "scope_kind": scope_kind, 1001 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1002 "limit": self._parse_limit(), 1003 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1004 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1005 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1006 }, 1007 ) 1008 1009 def _parse_put(self) -> exp.Put | exp.Command: 1010 if self._curr.token_type != TokenType.STRING: 1011 return self._parse_as_command(self._prev) 1012 1013 return self.expression( 1014 exp.Put, 1015 this=self._parse_string(), 1016 target=self._parse_location_path(), 1017 properties=self._parse_properties(), 1018 ) 1019 1020 def _parse_get(self) -> t.Optional[exp.Expression]: 1021 start = self._prev 1022 1023 # If we detect GET( then we need to parse a function, not a statement 1024 if self._match(TokenType.L_PAREN): 1025 self._retreat(self._index - 2) 1026 return self._parse_expression() 1027 1028 target = self._parse_location_path() 1029 1030 # Parse as command if unquoted file path 1031 if self._curr.token_type == TokenType.URI_START: 1032 return self._parse_as_command(start) 1033 1034 return self.expression( 1035 exp.Get, 1036 this=self._parse_string(), 1037 target=target, 1038 properties=self._parse_properties(), 1039 ) 1040 1041 def _parse_location_property(self) -> exp.LocationProperty: 1042 self._match(TokenType.EQ) 1043 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1044 1045 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1046 # Parse either a subquery or a staged file 1047 return ( 1048 self._parse_select(table=True, parse_subquery_alias=False) 1049 if self._match(TokenType.L_PAREN, advance=False) 1050 else self._parse_table_parts() 1051 ) 1052 1053 def _parse_location_path(self) -> exp.Var: 1054 start = self._curr 1055 self._advance_any(ignore_reserved=True) 1056 1057 # We avoid consuming a comma token because external tables like @foo and @bar 1058 # can be joined in a query with a comma separator, as well as closing paren 1059 # in case of subqueries 1060 while self._is_connected() and not self._match_set( 1061 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1062 ): 1063 self._advance_any(ignore_reserved=True) 1064 1065 return exp.var(self._find_sql(start, self._prev)) 1066 1067 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1068 this = super()._parse_lambda_arg() 1069 1070 if not this: 1071 return this 1072 1073 typ = self._parse_types() 1074 1075 if typ: 1076 return self.expression(exp.Cast, this=this, to=typ) 1077 1078 return this 1079 1080 def _parse_foreign_key(self) -> exp.ForeignKey: 1081 # inlineFK, the REFERENCES columns are implied 1082 if self._match(TokenType.REFERENCES, advance=False): 1083 return self.expression(exp.ForeignKey) 1084 1085 # outoflineFK, explicitly names the columns 1086 return super()._parse_foreign_key() 1087 1088 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1089 self._match(TokenType.EQ) 1090 if self._match(TokenType.L_PAREN, advance=False): 1091 expressions = self._parse_wrapped_options() 1092 else: 1093 expressions = [self._parse_format_name()] 1094 1095 return self.expression( 1096 exp.FileFormatProperty, 1097 expressions=expressions, 1098 ) 1099 1100 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1101 return self.expression( 1102 exp.CredentialsProperty, 1103 expressions=self._parse_wrapped_options(), 1104 ) 1105 1106 def _parse_semantic_view(self) -> exp.SemanticView: 1107 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1108 1109 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1110 if self._match_text_seq("DIMENSIONS"): 1111 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("METRICS"): 1113 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1114 if self._match_text_seq("WHERE"): 1115 kwargs["where"] = self._parse_expression() 1116 1117 return self.expression(exp.SemanticView, **kwargs) 1118 1119 class Tokenizer(tokens.Tokenizer): 1120 STRING_ESCAPES = ["\\", "'"] 1121 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1122 RAW_STRINGS = ["$$"] 1123 COMMENTS = ["--", "//", ("/*", "*/")] 1124 NESTED_COMMENTS = False 1125 1126 KEYWORDS = { 1127 **tokens.Tokenizer.KEYWORDS, 1128 "FILE://": TokenType.URI_START, 1129 "BYTEINT": TokenType.INT, 1130 "EXCLUDE": TokenType.EXCEPT, 1131 "FILE FORMAT": TokenType.FILE_FORMAT, 1132 "GET": TokenType.GET, 1133 "ILIKE ANY": TokenType.ILIKE_ANY, 1134 "LIKE ANY": TokenType.LIKE_ANY, 1135 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1136 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1137 "MINUS": TokenType.EXCEPT, 1138 "NCHAR VARYING": TokenType.VARCHAR, 1139 "PUT": TokenType.PUT, 1140 "REMOVE": TokenType.COMMAND, 1141 "RM": TokenType.COMMAND, 1142 "SAMPLE": TokenType.TABLE_SAMPLE, 1143 "SQL_DOUBLE": TokenType.DOUBLE, 1144 "SQL_VARCHAR": TokenType.VARCHAR, 1145 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1146 "TAG": TokenType.TAG, 1147 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1148 "TOP": TokenType.TOP, 1149 "WAREHOUSE": TokenType.WAREHOUSE, 1150 "STAGE": TokenType.STAGE, 1151 "STREAMLIT": TokenType.STREAMLIT, 1152 } 1153 KEYWORDS.pop("/*+") 1154 1155 SINGLE_TOKENS = { 1156 **tokens.Tokenizer.SINGLE_TOKENS, 1157 "$": TokenType.PARAMETER, 1158 } 1159 1160 VAR_SINGLE_TOKENS = {"$"} 1161 1162 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1163 1164 class Generator(generator.Generator): 1165 PARAMETER_TOKEN = "$" 1166 MATCHED_BY_SOURCE = False 1167 SINGLE_STRING_INTERVAL = True 1168 JOIN_HINTS = False 1169 TABLE_HINTS = False 1170 QUERY_HINTS = False 1171 AGGREGATE_FILTER_SUPPORTED = False 1172 SUPPORTS_TABLE_COPY = False 1173 COLLATE_IS_FUNC = True 1174 LIMIT_ONLY_LITERALS = True 1175 JSON_KEY_VALUE_PAIR_SEP = "," 1176 INSERT_OVERWRITE = " OVERWRITE INTO" 1177 STRUCT_DELIMITER = ("(", ")") 1178 COPY_PARAMS_ARE_WRAPPED = False 1179 COPY_PARAMS_EQ_REQUIRED = True 1180 STAR_EXCEPT = "EXCLUDE" 1181 SUPPORTS_EXPLODING_PROJECTIONS = False 1182 ARRAY_CONCAT_IS_VAR_LEN = False 1183 SUPPORTS_CONVERT_TIMEZONE = True 1184 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1185 SUPPORTS_MEDIAN = True 1186 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1187 SUPPORTS_DECODE_CASE = True 1188 1189 TRANSFORMS = { 1190 **generator.Generator.TRANSFORMS, 1191 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1192 exp.ArgMax: rename_func("MAX_BY"), 1193 exp.ArgMin: rename_func("MIN_BY"), 1194 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1195 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1196 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1197 exp.AtTimeZone: lambda self, e: self.func( 1198 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1199 ), 1200 exp.BitwiseOr: rename_func("BITOR"), 1201 exp.BitwiseXor: rename_func("BITXOR"), 1202 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1203 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1204 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1205 exp.DateAdd: date_delta_sql("DATEADD"), 1206 exp.DateDiff: date_delta_sql("DATEDIFF"), 1207 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1208 exp.DatetimeDiff: timestampdiff_sql, 1209 exp.DateStrToDate: datestrtodate_sql, 1210 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1211 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1212 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1213 exp.DayOfYear: rename_func("DAYOFYEAR"), 1214 exp.Explode: rename_func("FLATTEN"), 1215 exp.Extract: lambda self, e: self.func( 1216 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1217 ), 1218 exp.FileFormatProperty: lambda self, 1219 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1220 exp.FromTimeZone: lambda self, e: self.func( 1221 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1222 ), 1223 exp.GenerateSeries: lambda self, e: self.func( 1224 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1225 ), 1226 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1227 exp.If: if_sql(name="IFF", false_value="NULL"), 1228 exp.JSONExtractArray: _json_extract_value_array_sql, 1229 exp.JSONExtractScalar: lambda self, e: self.func( 1230 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1231 ), 1232 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1233 exp.JSONPathRoot: lambda *_: "", 1234 exp.JSONValueArray: _json_extract_value_array_sql, 1235 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1236 rename_func("EDITDISTANCE") 1237 ), 1238 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1239 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1240 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1241 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1242 exp.MakeInterval: no_make_interval_sql, 1243 exp.Max: max_or_greatest, 1244 exp.Min: min_or_least, 1245 exp.ParseJSON: lambda self, e: self.func( 1246 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1247 ), 1248 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1249 exp.PercentileCont: transforms.preprocess( 1250 [transforms.add_within_group_for_percentiles] 1251 ), 1252 exp.PercentileDisc: transforms.preprocess( 1253 [transforms.add_within_group_for_percentiles] 1254 ), 1255 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1256 exp.RegexpExtract: _regexpextract_sql, 1257 exp.RegexpExtractAll: _regexpextract_sql, 1258 exp.RegexpILike: _regexpilike_sql, 1259 exp.Rand: rename_func("RANDOM"), 1260 exp.Select: transforms.preprocess( 1261 [ 1262 transforms.eliminate_window_clause, 1263 transforms.eliminate_distinct_on, 1264 transforms.explode_projection_to_unnest(), 1265 transforms.eliminate_semi_and_anti_joins, 1266 _transform_generate_date_array, 1267 _qualify_unnested_columns, 1268 _eliminate_dot_variant_lookup, 1269 ] 1270 ), 1271 exp.SHA: rename_func("SHA1"), 1272 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1273 exp.StartsWith: rename_func("STARTSWITH"), 1274 exp.EndsWith: rename_func("ENDSWITH"), 1275 exp.StrPosition: lambda self, e: strposition_sql( 1276 self, e, func_name="CHARINDEX", supports_position=True 1277 ), 1278 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1279 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1280 exp.Stuff: rename_func("INSERT"), 1281 exp.StPoint: rename_func("ST_MAKEPOINT"), 1282 exp.TimeAdd: date_delta_sql("TIMEADD"), 1283 exp.Timestamp: no_timestamp_sql, 1284 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1285 exp.TimestampDiff: lambda self, e: self.func( 1286 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1287 ), 1288 exp.TimestampTrunc: timestamptrunc_sql(), 1289 exp.TimeStrToTime: timestrtotime_sql, 1290 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1291 exp.ToArray: rename_func("TO_ARRAY"), 1292 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1293 exp.ToDouble: rename_func("TO_DOUBLE"), 1294 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1295 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1296 exp.TsOrDsToDate: lambda self, e: self.func( 1297 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1298 ), 1299 exp.TsOrDsToTime: lambda self, e: self.func( 1300 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1301 ), 1302 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1303 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1304 exp.Uuid: rename_func("UUID_STRING"), 1305 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1306 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1307 exp.Xor: rename_func("BOOLXOR"), 1308 } 1309 1310 SUPPORTED_JSON_PATH_PARTS = { 1311 exp.JSONPathKey, 1312 exp.JSONPathRoot, 1313 exp.JSONPathSubscript, 1314 } 1315 1316 TYPE_MAPPING = { 1317 **generator.Generator.TYPE_MAPPING, 1318 exp.DataType.Type.NESTED: "OBJECT", 1319 exp.DataType.Type.STRUCT: "OBJECT", 1320 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1321 } 1322 1323 TOKEN_MAPPING = { 1324 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1325 } 1326 1327 PROPERTIES_LOCATION = { 1328 **generator.Generator.PROPERTIES_LOCATION, 1329 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1330 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1331 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1332 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1333 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1334 } 1335 1336 UNSUPPORTED_VALUES_EXPRESSIONS = { 1337 exp.Map, 1338 exp.StarMap, 1339 exp.Struct, 1340 exp.VarMap, 1341 } 1342 1343 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1344 1345 def with_properties(self, properties: exp.Properties) -> str: 1346 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1347 1348 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1349 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1350 values_as_table = False 1351 1352 return super().values_sql(expression, values_as_table=values_as_table) 1353 1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression) 1365 1366 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1367 return self.func( 1368 "TO_NUMBER", 1369 expression.this, 1370 expression.args.get("format"), 1371 expression.args.get("precision"), 1372 expression.args.get("scale"), 1373 ) 1374 1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1382 1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix) 1390 1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1400 return super().trycast_sql(expression) 1401 1402 # TRY_CAST only works for string values in Snowflake 1403 return self.cast_sql(expression) 1404 1405 def log_sql(self, expression: exp.Log) -> str: 1406 if not expression.expression: 1407 return self.func("LN", expression.this) 1408 1409 return super().log_sql(expression) 1410 1411 def unnest_sql(self, expression: exp.Unnest) -> str: 1412 unnest_alias = expression.args.get("alias") 1413 offset = expression.args.get("offset") 1414 1415 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1416 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1417 1418 columns = [ 1419 exp.to_identifier("seq"), 1420 exp.to_identifier("key"), 1421 exp.to_identifier("path"), 1422 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1423 value, 1424 exp.to_identifier("this"), 1425 ] 1426 1427 if unnest_alias: 1428 unnest_alias.set("columns", columns) 1429 else: 1430 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1431 1432 table_input = self.sql(expression.expressions[0]) 1433 if not table_input.startswith("INPUT =>"): 1434 table_input = f"INPUT => {table_input}" 1435 1436 expression_parent = expression.parent 1437 1438 explode = ( 1439 f"FLATTEN({table_input})" 1440 if isinstance(expression_parent, exp.Lateral) 1441 else f"TABLE(FLATTEN({table_input}))" 1442 ) 1443 alias = self.sql(unnest_alias) 1444 alias = f" AS {alias}" if alias else "" 1445 value = ( 1446 "" 1447 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1448 else f"{value} FROM " 1449 ) 1450 1451 return f"{value}{explode}{alias}" 1452 1453 def show_sql(self, expression: exp.Show) -> str: 1454 terse = "TERSE " if expression.args.get("terse") else "" 1455 history = " HISTORY" if expression.args.get("history") else "" 1456 like = self.sql(expression, "like") 1457 like = f" LIKE {like}" if like else "" 1458 1459 scope = self.sql(expression, "scope") 1460 scope = f" {scope}" if scope else "" 1461 1462 scope_kind = self.sql(expression, "scope_kind") 1463 if scope_kind: 1464 scope_kind = f" IN {scope_kind}" 1465 1466 starts_with = self.sql(expression, "starts_with") 1467 if starts_with: 1468 starts_with = f" STARTS WITH {starts_with}" 1469 1470 limit = self.sql(expression, "limit") 1471 1472 from_ = self.sql(expression, "from") 1473 if from_: 1474 from_ = f" FROM {from_}" 1475 1476 privileges = self.expressions(expression, key="privileges", flat=True) 1477 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1478 1479 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1480 1481 def describe_sql(self, expression: exp.Describe) -> str: 1482 # Default to table if kind is unknown 1483 kind_value = expression.args.get("kind") or "TABLE" 1484 kind = f" {kind_value}" if kind_value else "" 1485 this = f" {self.sql(expression, 'this')}" 1486 expressions = self.expressions(expression, flat=True) 1487 expressions = f" {expressions}" if expressions else "" 1488 return f"DESCRIBE{kind}{this}{expressions}" 1489 1490 def generatedasidentitycolumnconstraint_sql( 1491 self, expression: exp.GeneratedAsIdentityColumnConstraint 1492 ) -> str: 1493 start = expression.args.get("start") 1494 start = f" START {start}" if start else "" 1495 increment = expression.args.get("increment") 1496 increment = f" INCREMENT {increment}" if increment else "" 1497 1498 order = expression.args.get("order") 1499 if order is not None: 1500 order_clause = " ORDER" if order else " NOORDER" 1501 else: 1502 order_clause = "" 1503 1504 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1505 1506 def cluster_sql(self, expression: exp.Cluster) -> str: 1507 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1508 1509 def struct_sql(self, expression: exp.Struct) -> str: 1510 keys = [] 1511 values = [] 1512 1513 for i, e in enumerate(expression.expressions): 1514 if isinstance(e, exp.PropertyEQ): 1515 keys.append( 1516 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1517 ) 1518 values.append(e.expression) 1519 else: 1520 keys.append(exp.Literal.string(f"_{i}")) 1521 values.append(e) 1522 1523 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1524 1525 @unsupported_args("weight", "accuracy") 1526 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1527 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1528 1529 def alterset_sql(self, expression: exp.AlterSet) -> str: 1530 exprs = self.expressions(expression, flat=True) 1531 exprs = f" {exprs}" if exprs else "" 1532 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1533 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1534 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1535 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1536 tag = self.expressions(expression, key="tag", flat=True) 1537 tag = f" TAG {tag}" if tag else "" 1538 1539 return f"SET{exprs}{file_format}{copy_options}{tag}" 1540 1541 def strtotime_sql(self, expression: exp.StrToTime): 1542 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1543 return self.func( 1544 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1545 ) 1546 1547 def timestampsub_sql(self, expression: exp.TimestampSub): 1548 return self.sql( 1549 exp.TimestampAdd( 1550 this=expression.this, 1551 expression=expression.expression * -1, 1552 unit=expression.unit, 1553 ) 1554 ) 1555 1556 def jsonextract_sql(self, expression: exp.JSONExtract): 1557 this = expression.this 1558 1559 # JSON strings are valid coming from other dialects such as BQ 1560 return self.func( 1561 "GET_PATH", 1562 exp.ParseJSON(this=this) if this.is_string else this, 1563 expression.expression, 1564 ) 1565 1566 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1567 this = expression.this 1568 if this.is_string: 1569 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1570 1571 return self.func("TO_CHAR", this, self.format_time(expression)) 1572 1573 def datesub_sql(self, expression: exp.DateSub) -> str: 1574 value = expression.expression 1575 if value: 1576 value.replace(value * (-1)) 1577 else: 1578 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1579 1580 return date_delta_sql("DATEADD")(self, expression) 1581 1582 def select_sql(self, expression: exp.Select) -> str: 1583 limit = expression.args.get("limit") 1584 offset = expression.args.get("offset") 1585 if offset and not limit: 1586 expression.limit(exp.Null(), copy=False) 1587 return super().select_sql(expression) 1588 1589 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1590 is_materialized = expression.find(exp.MaterializedProperty) 1591 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1592 1593 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1594 # For materialized views, COPY GRANTS is located *before* the columns list 1595 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1596 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1597 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1598 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1599 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1600 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1601 1602 this_name = self.sql(expression.this, "this") 1603 copy_grants = self.sql(copy_grants_property) 1604 this_schema = self.schema_columns_sql(expression.this) 1605 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1606 1607 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1608 1609 return super().createable_sql(expression, locations) 1610 1611 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1612 this = expression.this 1613 1614 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1615 # and add it later as part of the WITHIN GROUP clause 1616 order = this if isinstance(this, exp.Order) else None 1617 if order: 1618 expression.set("this", order.this.pop()) 1619 1620 expr_sql = super().arrayagg_sql(expression) 1621 1622 if order: 1623 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1624 1625 return expr_sql 1626 1627 def array_sql(self, expression: exp.Array) -> str: 1628 expressions = expression.expressions 1629 1630 first_expr = seq_get(expressions, 0) 1631 if isinstance(first_expr, exp.Select): 1632 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1633 if first_expr.text("kind").upper() == "STRUCT": 1634 object_construct_args = [] 1635 for expr in first_expr.expressions: 1636 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1637 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1638 name = expr.this if isinstance(expr, exp.Alias) else expr 1639 1640 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1641 1642 array_agg = exp.ArrayAgg( 1643 this=_build_object_construct(args=object_construct_args) 1644 ) 1645 1646 first_expr.set("kind", None) 1647 first_expr.set("expressions", [array_agg]) 1648 1649 return self.sql(first_expr.subquery()) 1650 1651 return inline_array_sql(self, expression) 1652 1653 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1654 zone = self.sql(expression, "this") 1655 if not zone: 1656 return super().currentdate_sql(expression) 1657 1658 expr = exp.Cast( 1659 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1660 to=exp.DataType(this=exp.DataType.Type.DATE), 1661 ) 1662 return self.sql(expr)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
531 def quote_identifier(self, expression: E, identify: bool = True) -> E: 532 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 533 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 534 if ( 535 isinstance(expression, exp.Identifier) 536 and isinstance(expression.parent, exp.Table) 537 and expression.name.lower() == "dual" 538 ): 539 return expression # type: ignore 540 541 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
543 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 544 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 545 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
547 class Parser(parser.Parser): 548 IDENTIFY_PIVOT_STRINGS = True 549 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 550 COLON_IS_VARIANT_EXTRACT = True 551 552 ID_VAR_TOKENS = { 553 *parser.Parser.ID_VAR_TOKENS, 554 TokenType.MATCH_CONDITION, 555 } 556 557 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 558 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 559 560 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 561 562 FUNCTIONS = { 563 **parser.Parser.FUNCTIONS, 564 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 565 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 566 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 567 this=seq_get(args, 1), expression=seq_get(args, 0) 568 ), 569 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 570 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 571 start=seq_get(args, 0), 572 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 573 step=seq_get(args, 2), 574 ), 575 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 576 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 577 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 578 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 579 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 580 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 581 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 582 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 583 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 584 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 585 "DATE_TRUNC": _date_trunc_to_time, 586 "DATEADD": _build_date_time_add(exp.DateAdd), 587 "DATEDIFF": _build_datediff, 588 "DIV0": _build_if_from_div0, 589 "EDITDISTANCE": lambda args: exp.Levenshtein( 590 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 591 ), 592 "FLATTEN": exp.Explode.from_arg_list, 593 "GET_PATH": lambda args, dialect: exp.JSONExtract( 594 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 595 ), 596 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 597 "IFF": exp.If.from_arg_list, 598 "LAST_DAY": lambda args: exp.LastDay( 599 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 600 ), 601 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 602 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 603 "NULLIFZERO": _build_if_from_nullifzero, 604 "OBJECT_CONSTRUCT": _build_object_construct, 605 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 606 "REGEXP_REPLACE": _build_regexp_replace, 607 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 608 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 609 "REPLACE": build_replace_with_optional_replacement, 610 "RLIKE": exp.RegexpLike.from_arg_list, 611 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 612 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 613 "TIMEADD": _build_date_time_add(exp.TimeAdd), 614 "TIMEDIFF": _build_datediff, 615 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 616 "TIMESTAMPDIFF": _build_datediff, 617 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 618 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 619 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 620 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 621 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 622 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 623 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 624 "TRY_TO_TIMESTAMP": _build_datetime( 625 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 626 ), 627 "TO_CHAR": build_timetostr_or_tochar, 628 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 629 "TO_NUMBER": lambda args: exp.ToNumber( 630 this=seq_get(args, 0), 631 format=seq_get(args, 1), 632 precision=seq_get(args, 2), 633 scale=seq_get(args, 3), 634 ), 635 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 636 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 637 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 638 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 639 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 640 "TO_VARCHAR": exp.ToChar.from_arg_list, 641 "ZEROIFNULL": _build_if_from_zeroifnull, 642 } 643 644 FUNCTION_PARSERS = { 645 **parser.Parser.FUNCTION_PARSERS, 646 "DATE_PART": lambda self: self._parse_date_part(), 647 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 648 "LISTAGG": lambda self: self._parse_string_agg(), 649 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 650 } 651 FUNCTION_PARSERS.pop("TRIM") 652 653 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 654 655 RANGE_PARSERS = { 656 **parser.Parser.RANGE_PARSERS, 657 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 658 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 659 } 660 661 ALTER_PARSERS = { 662 **parser.Parser.ALTER_PARSERS, 663 "UNSET": lambda self: self.expression( 664 exp.Set, 665 tag=self._match_text_seq("TAG"), 666 expressions=self._parse_csv(self._parse_id_var), 667 unset=True, 668 ), 669 } 670 671 STATEMENT_PARSERS = { 672 **parser.Parser.STATEMENT_PARSERS, 673 TokenType.GET: lambda self: self._parse_get(), 674 TokenType.PUT: lambda self: self._parse_put(), 675 TokenType.SHOW: lambda self: self._parse_show(), 676 } 677 678 PROPERTY_PARSERS = { 679 **parser.Parser.PROPERTY_PARSERS, 680 "CREDENTIALS": lambda self: self._parse_credentials_property(), 681 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 682 "LOCATION": lambda self: self._parse_location_property(), 683 "TAG": lambda self: self._parse_tag(), 684 "USING": lambda self: self._match_text_seq("TEMPLATE") 685 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 686 } 687 688 TYPE_CONVERTERS = { 689 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 690 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 691 } 692 693 SHOW_PARSERS = { 694 "DATABASES": _show_parser("DATABASES"), 695 "TERSE DATABASES": _show_parser("DATABASES"), 696 "SCHEMAS": _show_parser("SCHEMAS"), 697 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 698 "OBJECTS": _show_parser("OBJECTS"), 699 "TERSE OBJECTS": _show_parser("OBJECTS"), 700 "TABLES": _show_parser("TABLES"), 701 "TERSE TABLES": _show_parser("TABLES"), 702 "VIEWS": _show_parser("VIEWS"), 703 "TERSE VIEWS": _show_parser("VIEWS"), 704 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 706 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 708 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 710 "SEQUENCES": _show_parser("SEQUENCES"), 711 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 712 "STAGES": _show_parser("STAGES"), 713 "COLUMNS": _show_parser("COLUMNS"), 714 "USERS": _show_parser("USERS"), 715 "TERSE USERS": _show_parser("USERS"), 716 "FILE FORMATS": _show_parser("FILE FORMATS"), 717 "FUNCTIONS": _show_parser("FUNCTIONS"), 718 "PROCEDURES": _show_parser("PROCEDURES"), 719 "WAREHOUSES": _show_parser("WAREHOUSES"), 720 } 721 722 CONSTRAINT_PARSERS = { 723 **parser.Parser.CONSTRAINT_PARSERS, 724 "WITH": lambda self: self._parse_with_constraint(), 725 "MASKING": lambda self: self._parse_with_constraint(), 726 "PROJECTION": lambda self: self._parse_with_constraint(), 727 "TAG": lambda self: self._parse_with_constraint(), 728 } 729 730 STAGED_FILE_SINGLE_TOKENS = { 731 TokenType.DOT, 732 TokenType.MOD, 733 TokenType.SLASH, 734 } 735 736 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 737 738 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 739 740 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 741 742 LAMBDAS = { 743 **parser.Parser.LAMBDAS, 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 751 ), 752 } 753 754 def _parse_use(self) -> exp.Use: 755 if self._match_text_seq("SECONDARY", "ROLES"): 756 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 757 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 758 return self.expression( 759 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 760 ) 761 762 return super()._parse_use() 763 764 def _negate_range( 765 self, this: t.Optional[exp.Expression] = None 766 ) -> t.Optional[exp.Expression]: 767 if not this: 768 return this 769 770 query = this.args.get("query") 771 if isinstance(this, exp.In) and isinstance(query, exp.Query): 772 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 773 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 774 # which can produce different results (most likely a SnowFlake bug). 775 # 776 # https://docs.snowflake.com/en/sql-reference/functions/in 777 # Context: https://github.com/tobymao/sqlglot/issues/3890 778 return self.expression( 779 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 780 ) 781 782 return self.expression(exp.Not, this=this) 783 784 def _parse_tag(self) -> exp.Tags: 785 return self.expression( 786 exp.Tags, 787 expressions=self._parse_wrapped_csv(self._parse_property), 788 ) 789 790 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 791 if self._prev.token_type != TokenType.WITH: 792 self._retreat(self._index - 1) 793 794 if self._match_text_seq("MASKING", "POLICY"): 795 policy = self._parse_column() 796 return self.expression( 797 exp.MaskingPolicyColumnConstraint, 798 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 799 expressions=self._match(TokenType.USING) 800 and self._parse_wrapped_csv(self._parse_id_var), 801 ) 802 if self._match_text_seq("PROJECTION", "POLICY"): 803 policy = self._parse_column() 804 return self.expression( 805 exp.ProjectionPolicyColumnConstraint, 806 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 807 ) 808 if self._match(TokenType.TAG): 809 return self._parse_tag() 810 811 return None 812 813 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 814 if self._match(TokenType.TAG): 815 return self._parse_tag() 816 817 return super()._parse_with_property() 818 819 def _parse_create(self) -> exp.Create | exp.Command: 820 expression = super()._parse_create() 821 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 822 # Replace the Table node with the enclosed Identifier 823 expression.this.replace(expression.this.this) 824 825 return expression 826 827 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 828 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 829 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 830 this = self._parse_var() or self._parse_type() 831 832 if not this: 833 return None 834 835 self._match(TokenType.COMMA) 836 expression = self._parse_bitwise() 837 this = map_date_part(this) 838 name = this.name.upper() 839 840 if name.startswith("EPOCH"): 841 if name == "EPOCH_MILLISECOND": 842 scale = 10**3 843 elif name == "EPOCH_MICROSECOND": 844 scale = 10**6 845 elif name == "EPOCH_NANOSECOND": 846 scale = 10**9 847 else: 848 scale = None 849 850 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 851 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 852 853 if scale: 854 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 855 856 return to_unix 857 858 return self.expression(exp.Extract, this=this, expression=expression) 859 860 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 861 if is_map: 862 # Keys are strings in Snowflake's objects, see also: 863 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 864 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 865 return self._parse_slice(self._parse_string()) 866 867 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 868 869 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 870 lateral = super()._parse_lateral() 871 if not lateral: 872 return lateral 873 874 if isinstance(lateral.this, exp.Explode): 875 table_alias = lateral.args.get("alias") 876 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 877 if table_alias and not table_alias.args.get("columns"): 878 table_alias.set("columns", columns) 879 elif not table_alias: 880 exp.alias_(lateral, "_flattened", table=columns, copy=False) 881 882 return lateral 883 884 def _parse_table_parts( 885 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 886 ) -> exp.Table: 887 # https://docs.snowflake.com/en/user-guide/querying-stage 888 if self._match(TokenType.STRING, advance=False): 889 table = self._parse_string() 890 elif self._match_text_seq("@", advance=False): 891 table = self._parse_location_path() 892 else: 893 table = None 894 895 if table: 896 file_format = None 897 pattern = None 898 899 wrapped = self._match(TokenType.L_PAREN) 900 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 901 if self._match_text_seq("FILE_FORMAT", "=>"): 902 file_format = self._parse_string() or super()._parse_table_parts( 903 is_db_reference=is_db_reference 904 ) 905 elif self._match_text_seq("PATTERN", "=>"): 906 pattern = self._parse_string() 907 else: 908 break 909 910 self._match(TokenType.COMMA) 911 912 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 913 else: 914 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 915 916 return table 917 918 def _parse_table( 919 self, 920 schema: bool = False, 921 joins: bool = False, 922 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 923 parse_bracket: bool = False, 924 is_db_reference: bool = False, 925 parse_partition: bool = False, 926 consume_pipe: bool = False, 927 ) -> t.Optional[exp.Expression]: 928 table = super()._parse_table( 929 schema=schema, 930 joins=joins, 931 alias_tokens=alias_tokens, 932 parse_bracket=parse_bracket, 933 is_db_reference=is_db_reference, 934 parse_partition=parse_partition, 935 ) 936 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 937 table_from_rows = table.this 938 for arg in exp.TableFromRows.arg_types: 939 if arg != "this": 940 table_from_rows.set(arg, table.args.get(arg)) 941 942 table = table_from_rows 943 944 return table 945 946 def _parse_id_var( 947 self, 948 any_token: bool = True, 949 tokens: t.Optional[t.Collection[TokenType]] = None, 950 ) -> t.Optional[exp.Expression]: 951 if self._match_text_seq("IDENTIFIER", "("): 952 identifier = ( 953 super()._parse_id_var(any_token=any_token, tokens=tokens) 954 or self._parse_string() 955 ) 956 self._match_r_paren() 957 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 958 959 return super()._parse_id_var(any_token=any_token, tokens=tokens) 960 961 def _parse_show_snowflake(self, this: str) -> exp.Show: 962 scope = None 963 scope_kind = None 964 965 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 966 # which is syntactically valid but has no effect on the output 967 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 968 969 history = self._match_text_seq("HISTORY") 970 971 like = self._parse_string() if self._match(TokenType.LIKE) else None 972 973 if self._match(TokenType.IN): 974 if self._match_text_seq("ACCOUNT"): 975 scope_kind = "ACCOUNT" 976 elif self._match_text_seq("CLASS"): 977 scope_kind = "CLASS" 978 scope = self._parse_table_parts() 979 elif self._match_text_seq("APPLICATION"): 980 scope_kind = "APPLICATION" 981 if self._match_text_seq("PACKAGE"): 982 scope_kind += " PACKAGE" 983 scope = self._parse_table_parts() 984 elif self._match_set(self.DB_CREATABLES): 985 scope_kind = self._prev.text.upper() 986 if self._curr: 987 scope = self._parse_table_parts() 988 elif self._curr: 989 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 990 scope = self._parse_table_parts() 991 992 return self.expression( 993 exp.Show, 994 **{ 995 "terse": terse, 996 "this": this, 997 "history": history, 998 "like": like, 999 "scope": scope, 1000 "scope_kind": scope_kind, 1001 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1002 "limit": self._parse_limit(), 1003 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1004 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1005 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1006 }, 1007 ) 1008 1009 def _parse_put(self) -> exp.Put | exp.Command: 1010 if self._curr.token_type != TokenType.STRING: 1011 return self._parse_as_command(self._prev) 1012 1013 return self.expression( 1014 exp.Put, 1015 this=self._parse_string(), 1016 target=self._parse_location_path(), 1017 properties=self._parse_properties(), 1018 ) 1019 1020 def _parse_get(self) -> t.Optional[exp.Expression]: 1021 start = self._prev 1022 1023 # If we detect GET( then we need to parse a function, not a statement 1024 if self._match(TokenType.L_PAREN): 1025 self._retreat(self._index - 2) 1026 return self._parse_expression() 1027 1028 target = self._parse_location_path() 1029 1030 # Parse as command if unquoted file path 1031 if self._curr.token_type == TokenType.URI_START: 1032 return self._parse_as_command(start) 1033 1034 return self.expression( 1035 exp.Get, 1036 this=self._parse_string(), 1037 target=target, 1038 properties=self._parse_properties(), 1039 ) 1040 1041 def _parse_location_property(self) -> exp.LocationProperty: 1042 self._match(TokenType.EQ) 1043 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1044 1045 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1046 # Parse either a subquery or a staged file 1047 return ( 1048 self._parse_select(table=True, parse_subquery_alias=False) 1049 if self._match(TokenType.L_PAREN, advance=False) 1050 else self._parse_table_parts() 1051 ) 1052 1053 def _parse_location_path(self) -> exp.Var: 1054 start = self._curr 1055 self._advance_any(ignore_reserved=True) 1056 1057 # We avoid consuming a comma token because external tables like @foo and @bar 1058 # can be joined in a query with a comma separator, as well as closing paren 1059 # in case of subqueries 1060 while self._is_connected() and not self._match_set( 1061 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1062 ): 1063 self._advance_any(ignore_reserved=True) 1064 1065 return exp.var(self._find_sql(start, self._prev)) 1066 1067 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1068 this = super()._parse_lambda_arg() 1069 1070 if not this: 1071 return this 1072 1073 typ = self._parse_types() 1074 1075 if typ: 1076 return self.expression(exp.Cast, this=this, to=typ) 1077 1078 return this 1079 1080 def _parse_foreign_key(self) -> exp.ForeignKey: 1081 # inlineFK, the REFERENCES columns are implied 1082 if self._match(TokenType.REFERENCES, advance=False): 1083 return self.expression(exp.ForeignKey) 1084 1085 # outoflineFK, explicitly names the columns 1086 return super()._parse_foreign_key() 1087 1088 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1089 self._match(TokenType.EQ) 1090 if self._match(TokenType.L_PAREN, advance=False): 1091 expressions = self._parse_wrapped_options() 1092 else: 1093 expressions = [self._parse_format_name()] 1094 1095 return self.expression( 1096 exp.FileFormatProperty, 1097 expressions=expressions, 1098 ) 1099 1100 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1101 return self.expression( 1102 exp.CredentialsProperty, 1103 expressions=self._parse_wrapped_options(), 1104 ) 1105 1106 def _parse_semantic_view(self) -> exp.SemanticView: 1107 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1108 1109 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1110 if self._match_text_seq("DIMENSIONS"): 1111 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("METRICS"): 1113 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1114 if self._match_text_seq("WHERE"): 1115 kwargs["where"] = self._parse_expression() 1116 1117 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
1119 class Tokenizer(tokens.Tokenizer): 1120 STRING_ESCAPES = ["\\", "'"] 1121 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1122 RAW_STRINGS = ["$$"] 1123 COMMENTS = ["--", "//", ("/*", "*/")] 1124 NESTED_COMMENTS = False 1125 1126 KEYWORDS = { 1127 **tokens.Tokenizer.KEYWORDS, 1128 "FILE://": TokenType.URI_START, 1129 "BYTEINT": TokenType.INT, 1130 "EXCLUDE": TokenType.EXCEPT, 1131 "FILE FORMAT": TokenType.FILE_FORMAT, 1132 "GET": TokenType.GET, 1133 "ILIKE ANY": TokenType.ILIKE_ANY, 1134 "LIKE ANY": TokenType.LIKE_ANY, 1135 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1136 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1137 "MINUS": TokenType.EXCEPT, 1138 "NCHAR VARYING": TokenType.VARCHAR, 1139 "PUT": TokenType.PUT, 1140 "REMOVE": TokenType.COMMAND, 1141 "RM": TokenType.COMMAND, 1142 "SAMPLE": TokenType.TABLE_SAMPLE, 1143 "SQL_DOUBLE": TokenType.DOUBLE, 1144 "SQL_VARCHAR": TokenType.VARCHAR, 1145 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1146 "TAG": TokenType.TAG, 1147 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1148 "TOP": TokenType.TOP, 1149 "WAREHOUSE": TokenType.WAREHOUSE, 1150 "STAGE": TokenType.STAGE, 1151 "STREAMLIT": TokenType.STREAMLIT, 1152 } 1153 KEYWORDS.pop("/*+") 1154 1155 SINGLE_TOKENS = { 1156 **tokens.Tokenizer.SINGLE_TOKENS, 1157 "$": TokenType.PARAMETER, 1158 } 1159 1160 VAR_SINGLE_TOKENS = {"$"} 1161 1162 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1164 class Generator(generator.Generator): 1165 PARAMETER_TOKEN = "$" 1166 MATCHED_BY_SOURCE = False 1167 SINGLE_STRING_INTERVAL = True 1168 JOIN_HINTS = False 1169 TABLE_HINTS = False 1170 QUERY_HINTS = False 1171 AGGREGATE_FILTER_SUPPORTED = False 1172 SUPPORTS_TABLE_COPY = False 1173 COLLATE_IS_FUNC = True 1174 LIMIT_ONLY_LITERALS = True 1175 JSON_KEY_VALUE_PAIR_SEP = "," 1176 INSERT_OVERWRITE = " OVERWRITE INTO" 1177 STRUCT_DELIMITER = ("(", ")") 1178 COPY_PARAMS_ARE_WRAPPED = False 1179 COPY_PARAMS_EQ_REQUIRED = True 1180 STAR_EXCEPT = "EXCLUDE" 1181 SUPPORTS_EXPLODING_PROJECTIONS = False 1182 ARRAY_CONCAT_IS_VAR_LEN = False 1183 SUPPORTS_CONVERT_TIMEZONE = True 1184 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1185 SUPPORTS_MEDIAN = True 1186 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1187 SUPPORTS_DECODE_CASE = True 1188 1189 TRANSFORMS = { 1190 **generator.Generator.TRANSFORMS, 1191 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1192 exp.ArgMax: rename_func("MAX_BY"), 1193 exp.ArgMin: rename_func("MIN_BY"), 1194 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1195 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1196 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1197 exp.AtTimeZone: lambda self, e: self.func( 1198 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1199 ), 1200 exp.BitwiseOr: rename_func("BITOR"), 1201 exp.BitwiseXor: rename_func("BITXOR"), 1202 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1203 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1204 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1205 exp.DateAdd: date_delta_sql("DATEADD"), 1206 exp.DateDiff: date_delta_sql("DATEDIFF"), 1207 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1208 exp.DatetimeDiff: timestampdiff_sql, 1209 exp.DateStrToDate: datestrtodate_sql, 1210 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1211 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1212 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1213 exp.DayOfYear: rename_func("DAYOFYEAR"), 1214 exp.Explode: rename_func("FLATTEN"), 1215 exp.Extract: lambda self, e: self.func( 1216 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1217 ), 1218 exp.FileFormatProperty: lambda self, 1219 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1220 exp.FromTimeZone: lambda self, e: self.func( 1221 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1222 ), 1223 exp.GenerateSeries: lambda self, e: self.func( 1224 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1225 ), 1226 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1227 exp.If: if_sql(name="IFF", false_value="NULL"), 1228 exp.JSONExtractArray: _json_extract_value_array_sql, 1229 exp.JSONExtractScalar: lambda self, e: self.func( 1230 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1231 ), 1232 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1233 exp.JSONPathRoot: lambda *_: "", 1234 exp.JSONValueArray: _json_extract_value_array_sql, 1235 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1236 rename_func("EDITDISTANCE") 1237 ), 1238 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1239 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1240 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1241 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1242 exp.MakeInterval: no_make_interval_sql, 1243 exp.Max: max_or_greatest, 1244 exp.Min: min_or_least, 1245 exp.ParseJSON: lambda self, e: self.func( 1246 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1247 ), 1248 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1249 exp.PercentileCont: transforms.preprocess( 1250 [transforms.add_within_group_for_percentiles] 1251 ), 1252 exp.PercentileDisc: transforms.preprocess( 1253 [transforms.add_within_group_for_percentiles] 1254 ), 1255 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1256 exp.RegexpExtract: _regexpextract_sql, 1257 exp.RegexpExtractAll: _regexpextract_sql, 1258 exp.RegexpILike: _regexpilike_sql, 1259 exp.Rand: rename_func("RANDOM"), 1260 exp.Select: transforms.preprocess( 1261 [ 1262 transforms.eliminate_window_clause, 1263 transforms.eliminate_distinct_on, 1264 transforms.explode_projection_to_unnest(), 1265 transforms.eliminate_semi_and_anti_joins, 1266 _transform_generate_date_array, 1267 _qualify_unnested_columns, 1268 _eliminate_dot_variant_lookup, 1269 ] 1270 ), 1271 exp.SHA: rename_func("SHA1"), 1272 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1273 exp.StartsWith: rename_func("STARTSWITH"), 1274 exp.EndsWith: rename_func("ENDSWITH"), 1275 exp.StrPosition: lambda self, e: strposition_sql( 1276 self, e, func_name="CHARINDEX", supports_position=True 1277 ), 1278 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1279 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1280 exp.Stuff: rename_func("INSERT"), 1281 exp.StPoint: rename_func("ST_MAKEPOINT"), 1282 exp.TimeAdd: date_delta_sql("TIMEADD"), 1283 exp.Timestamp: no_timestamp_sql, 1284 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1285 exp.TimestampDiff: lambda self, e: self.func( 1286 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1287 ), 1288 exp.TimestampTrunc: timestamptrunc_sql(), 1289 exp.TimeStrToTime: timestrtotime_sql, 1290 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1291 exp.ToArray: rename_func("TO_ARRAY"), 1292 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1293 exp.ToDouble: rename_func("TO_DOUBLE"), 1294 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1295 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1296 exp.TsOrDsToDate: lambda self, e: self.func( 1297 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1298 ), 1299 exp.TsOrDsToTime: lambda self, e: self.func( 1300 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1301 ), 1302 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1303 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1304 exp.Uuid: rename_func("UUID_STRING"), 1305 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1306 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1307 exp.Xor: rename_func("BOOLXOR"), 1308 } 1309 1310 SUPPORTED_JSON_PATH_PARTS = { 1311 exp.JSONPathKey, 1312 exp.JSONPathRoot, 1313 exp.JSONPathSubscript, 1314 } 1315 1316 TYPE_MAPPING = { 1317 **generator.Generator.TYPE_MAPPING, 1318 exp.DataType.Type.NESTED: "OBJECT", 1319 exp.DataType.Type.STRUCT: "OBJECT", 1320 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1321 } 1322 1323 TOKEN_MAPPING = { 1324 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1325 } 1326 1327 PROPERTIES_LOCATION = { 1328 **generator.Generator.PROPERTIES_LOCATION, 1329 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1330 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1331 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1332 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1333 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1334 } 1335 1336 UNSUPPORTED_VALUES_EXPRESSIONS = { 1337 exp.Map, 1338 exp.StarMap, 1339 exp.Struct, 1340 exp.VarMap, 1341 } 1342 1343 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1344 1345 def with_properties(self, properties: exp.Properties) -> str: 1346 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1347 1348 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1349 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1350 values_as_table = False 1351 1352 return super().values_sql(expression, values_as_table=values_as_table) 1353 1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression) 1365 1366 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1367 return self.func( 1368 "TO_NUMBER", 1369 expression.this, 1370 expression.args.get("format"), 1371 expression.args.get("precision"), 1372 expression.args.get("scale"), 1373 ) 1374 1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1382 1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix) 1390 1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1400 return super().trycast_sql(expression) 1401 1402 # TRY_CAST only works for string values in Snowflake 1403 return self.cast_sql(expression) 1404 1405 def log_sql(self, expression: exp.Log) -> str: 1406 if not expression.expression: 1407 return self.func("LN", expression.this) 1408 1409 return super().log_sql(expression) 1410 1411 def unnest_sql(self, expression: exp.Unnest) -> str: 1412 unnest_alias = expression.args.get("alias") 1413 offset = expression.args.get("offset") 1414 1415 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1416 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1417 1418 columns = [ 1419 exp.to_identifier("seq"), 1420 exp.to_identifier("key"), 1421 exp.to_identifier("path"), 1422 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1423 value, 1424 exp.to_identifier("this"), 1425 ] 1426 1427 if unnest_alias: 1428 unnest_alias.set("columns", columns) 1429 else: 1430 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1431 1432 table_input = self.sql(expression.expressions[0]) 1433 if not table_input.startswith("INPUT =>"): 1434 table_input = f"INPUT => {table_input}" 1435 1436 expression_parent = expression.parent 1437 1438 explode = ( 1439 f"FLATTEN({table_input})" 1440 if isinstance(expression_parent, exp.Lateral) 1441 else f"TABLE(FLATTEN({table_input}))" 1442 ) 1443 alias = self.sql(unnest_alias) 1444 alias = f" AS {alias}" if alias else "" 1445 value = ( 1446 "" 1447 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1448 else f"{value} FROM " 1449 ) 1450 1451 return f"{value}{explode}{alias}" 1452 1453 def show_sql(self, expression: exp.Show) -> str: 1454 terse = "TERSE " if expression.args.get("terse") else "" 1455 history = " HISTORY" if expression.args.get("history") else "" 1456 like = self.sql(expression, "like") 1457 like = f" LIKE {like}" if like else "" 1458 1459 scope = self.sql(expression, "scope") 1460 scope = f" {scope}" if scope else "" 1461 1462 scope_kind = self.sql(expression, "scope_kind") 1463 if scope_kind: 1464 scope_kind = f" IN {scope_kind}" 1465 1466 starts_with = self.sql(expression, "starts_with") 1467 if starts_with: 1468 starts_with = f" STARTS WITH {starts_with}" 1469 1470 limit = self.sql(expression, "limit") 1471 1472 from_ = self.sql(expression, "from") 1473 if from_: 1474 from_ = f" FROM {from_}" 1475 1476 privileges = self.expressions(expression, key="privileges", flat=True) 1477 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1478 1479 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1480 1481 def describe_sql(self, expression: exp.Describe) -> str: 1482 # Default to table if kind is unknown 1483 kind_value = expression.args.get("kind") or "TABLE" 1484 kind = f" {kind_value}" if kind_value else "" 1485 this = f" {self.sql(expression, 'this')}" 1486 expressions = self.expressions(expression, flat=True) 1487 expressions = f" {expressions}" if expressions else "" 1488 return f"DESCRIBE{kind}{this}{expressions}" 1489 1490 def generatedasidentitycolumnconstraint_sql( 1491 self, expression: exp.GeneratedAsIdentityColumnConstraint 1492 ) -> str: 1493 start = expression.args.get("start") 1494 start = f" START {start}" if start else "" 1495 increment = expression.args.get("increment") 1496 increment = f" INCREMENT {increment}" if increment else "" 1497 1498 order = expression.args.get("order") 1499 if order is not None: 1500 order_clause = " ORDER" if order else " NOORDER" 1501 else: 1502 order_clause = "" 1503 1504 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1505 1506 def cluster_sql(self, expression: exp.Cluster) -> str: 1507 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1508 1509 def struct_sql(self, expression: exp.Struct) -> str: 1510 keys = [] 1511 values = [] 1512 1513 for i, e in enumerate(expression.expressions): 1514 if isinstance(e, exp.PropertyEQ): 1515 keys.append( 1516 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1517 ) 1518 values.append(e.expression) 1519 else: 1520 keys.append(exp.Literal.string(f"_{i}")) 1521 values.append(e) 1522 1523 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1524 1525 @unsupported_args("weight", "accuracy") 1526 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1527 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1528 1529 def alterset_sql(self, expression: exp.AlterSet) -> str: 1530 exprs = self.expressions(expression, flat=True) 1531 exprs = f" {exprs}" if exprs else "" 1532 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1533 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1534 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1535 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1536 tag = self.expressions(expression, key="tag", flat=True) 1537 tag = f" TAG {tag}" if tag else "" 1538 1539 return f"SET{exprs}{file_format}{copy_options}{tag}" 1540 1541 def strtotime_sql(self, expression: exp.StrToTime): 1542 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1543 return self.func( 1544 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1545 ) 1546 1547 def timestampsub_sql(self, expression: exp.TimestampSub): 1548 return self.sql( 1549 exp.TimestampAdd( 1550 this=expression.this, 1551 expression=expression.expression * -1, 1552 unit=expression.unit, 1553 ) 1554 ) 1555 1556 def jsonextract_sql(self, expression: exp.JSONExtract): 1557 this = expression.this 1558 1559 # JSON strings are valid coming from other dialects such as BQ 1560 return self.func( 1561 "GET_PATH", 1562 exp.ParseJSON(this=this) if this.is_string else this, 1563 expression.expression, 1564 ) 1565 1566 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1567 this = expression.this 1568 if this.is_string: 1569 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1570 1571 return self.func("TO_CHAR", this, self.format_time(expression)) 1572 1573 def datesub_sql(self, expression: exp.DateSub) -> str: 1574 value = expression.expression 1575 if value: 1576 value.replace(value * (-1)) 1577 else: 1578 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1579 1580 return date_delta_sql("DATEADD")(self, expression) 1581 1582 def select_sql(self, expression: exp.Select) -> str: 1583 limit = expression.args.get("limit") 1584 offset = expression.args.get("offset") 1585 if offset and not limit: 1586 expression.limit(exp.Null(), copy=False) 1587 return super().select_sql(expression) 1588 1589 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1590 is_materialized = expression.find(exp.MaterializedProperty) 1591 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1592 1593 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1594 # For materialized views, COPY GRANTS is located *before* the columns list 1595 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1596 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1597 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1598 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1599 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1600 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1601 1602 this_name = self.sql(expression.this, "this") 1603 copy_grants = self.sql(copy_grants_property) 1604 this_schema = self.schema_columns_sql(expression.this) 1605 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1606 1607 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1608 1609 return super().createable_sql(expression, locations) 1610 1611 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1612 this = expression.this 1613 1614 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1615 # and add it later as part of the WITHIN GROUP clause 1616 order = this if isinstance(this, exp.Order) else None 1617 if order: 1618 expression.set("this", order.this.pop()) 1619 1620 expr_sql = super().arrayagg_sql(expression) 1621 1622 if order: 1623 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1624 1625 return expr_sql 1626 1627 def array_sql(self, expression: exp.Array) -> str: 1628 expressions = expression.expressions 1629 1630 first_expr = seq_get(expressions, 0) 1631 if isinstance(first_expr, exp.Select): 1632 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1633 if first_expr.text("kind").upper() == "STRUCT": 1634 object_construct_args = [] 1635 for expr in first_expr.expressions: 1636 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1637 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1638 name = expr.this if isinstance(expr, exp.Alias) else expr 1639 1640 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1641 1642 array_agg = exp.ArrayAgg( 1643 this=_build_object_construct(args=object_construct_args) 1644 ) 1645 1646 first_expr.set("kind", None) 1647 first_expr.set("expressions", [array_agg]) 1648 1649 return self.sql(first_expr.subquery()) 1650 1651 return inline_array_sql(self, expression) 1652 1653 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1654 zone = self.sql(expression, "this") 1655 if not zone: 1656 return super().currentdate_sql(expression) 1657 1658 expr = exp.Cast( 1659 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1660 to=exp.DataType(this=exp.DataType.Type.DATE), 1661 ) 1662 return self.sql(expr)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression)
1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix)
1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1400 return super().trycast_sql(expression) 1401 1402 # TRY_CAST only works for string values in Snowflake 1403 return self.cast_sql(expression)
1411 def unnest_sql(self, expression: exp.Unnest) -> str: 1412 unnest_alias = expression.args.get("alias") 1413 offset = expression.args.get("offset") 1414 1415 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1416 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1417 1418 columns = [ 1419 exp.to_identifier("seq"), 1420 exp.to_identifier("key"), 1421 exp.to_identifier("path"), 1422 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1423 value, 1424 exp.to_identifier("this"), 1425 ] 1426 1427 if unnest_alias: 1428 unnest_alias.set("columns", columns) 1429 else: 1430 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1431 1432 table_input = self.sql(expression.expressions[0]) 1433 if not table_input.startswith("INPUT =>"): 1434 table_input = f"INPUT => {table_input}" 1435 1436 expression_parent = expression.parent 1437 1438 explode = ( 1439 f"FLATTEN({table_input})" 1440 if isinstance(expression_parent, exp.Lateral) 1441 else f"TABLE(FLATTEN({table_input}))" 1442 ) 1443 alias = self.sql(unnest_alias) 1444 alias = f" AS {alias}" if alias else "" 1445 value = ( 1446 "" 1447 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1448 else f"{value} FROM " 1449 ) 1450 1451 return f"{value}{explode}{alias}"
1453 def show_sql(self, expression: exp.Show) -> str: 1454 terse = "TERSE " if expression.args.get("terse") else "" 1455 history = " HISTORY" if expression.args.get("history") else "" 1456 like = self.sql(expression, "like") 1457 like = f" LIKE {like}" if like else "" 1458 1459 scope = self.sql(expression, "scope") 1460 scope = f" {scope}" if scope else "" 1461 1462 scope_kind = self.sql(expression, "scope_kind") 1463 if scope_kind: 1464 scope_kind = f" IN {scope_kind}" 1465 1466 starts_with = self.sql(expression, "starts_with") 1467 if starts_with: 1468 starts_with = f" STARTS WITH {starts_with}" 1469 1470 limit = self.sql(expression, "limit") 1471 1472 from_ = self.sql(expression, "from") 1473 if from_: 1474 from_ = f" FROM {from_}" 1475 1476 privileges = self.expressions(expression, key="privileges", flat=True) 1477 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1478 1479 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1481 def describe_sql(self, expression: exp.Describe) -> str: 1482 # Default to table if kind is unknown 1483 kind_value = expression.args.get("kind") or "TABLE" 1484 kind = f" {kind_value}" if kind_value else "" 1485 this = f" {self.sql(expression, 'this')}" 1486 expressions = self.expressions(expression, flat=True) 1487 expressions = f" {expressions}" if expressions else "" 1488 return f"DESCRIBE{kind}{this}{expressions}"
1490 def generatedasidentitycolumnconstraint_sql( 1491 self, expression: exp.GeneratedAsIdentityColumnConstraint 1492 ) -> str: 1493 start = expression.args.get("start") 1494 start = f" START {start}" if start else "" 1495 increment = expression.args.get("increment") 1496 increment = f" INCREMENT {increment}" if increment else "" 1497 1498 order = expression.args.get("order") 1499 if order is not None: 1500 order_clause = " ORDER" if order else " NOORDER" 1501 else: 1502 order_clause = "" 1503 1504 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1509 def struct_sql(self, expression: exp.Struct) -> str: 1510 keys = [] 1511 values = [] 1512 1513 for i, e in enumerate(expression.expressions): 1514 if isinstance(e, exp.PropertyEQ): 1515 keys.append( 1516 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1517 ) 1518 values.append(e.expression) 1519 else: 1520 keys.append(exp.Literal.string(f"_{i}")) 1521 values.append(e) 1522 1523 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1529 def alterset_sql(self, expression: exp.AlterSet) -> str: 1530 exprs = self.expressions(expression, flat=True) 1531 exprs = f" {exprs}" if exprs else "" 1532 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1533 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1534 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1535 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1536 tag = self.expressions(expression, key="tag", flat=True) 1537 tag = f" TAG {tag}" if tag else "" 1538 1539 return f"SET{exprs}{file_format}{copy_options}{tag}"
1573 def datesub_sql(self, expression: exp.DateSub) -> str: 1574 value = expression.expression 1575 if value: 1576 value.replace(value * (-1)) 1577 else: 1578 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1579 1580 return date_delta_sql("DATEADD")(self, expression)
1589 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1590 is_materialized = expression.find(exp.MaterializedProperty) 1591 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1592 1593 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1594 # For materialized views, COPY GRANTS is located *before* the columns list 1595 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1596 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1597 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1598 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1599 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1600 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1601 1602 this_name = self.sql(expression.this, "this") 1603 copy_grants = self.sql(copy_grants_property) 1604 this_schema = self.schema_columns_sql(expression.this) 1605 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1606 1607 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1608 1609 return super().createable_sql(expression, locations)
1611 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1612 this = expression.this 1613 1614 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1615 # and add it later as part of the WITHIN GROUP clause 1616 order = this if isinstance(this, exp.Order) else None 1617 if order: 1618 expression.set("this", order.this.pop()) 1619 1620 expr_sql = super().arrayagg_sql(expression) 1621 1622 if order: 1623 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1624 1625 return expr_sql
1627 def array_sql(self, expression: exp.Array) -> str: 1628 expressions = expression.expressions 1629 1630 first_expr = seq_get(expressions, 0) 1631 if isinstance(first_expr, exp.Select): 1632 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1633 if first_expr.text("kind").upper() == "STRUCT": 1634 object_construct_args = [] 1635 for expr in first_expr.expressions: 1636 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1637 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1638 name = expr.this if isinstance(expr, exp.Alias) else expr 1639 1640 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1641 1642 array_agg = exp.ArrayAgg( 1643 this=_build_object_construct(args=object_construct_args) 1644 ) 1645 1646 first_expr.set("kind", None) 1647 first_expr.set("expressions", [array_agg]) 1648 1649 return self.sql(first_expr.subquery()) 1650 1651 return inline_array_sql(self, expression)
1653 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1654 zone = self.sql(expression, "this") 1655 if not zone: 1656 return super().currentdate_sql(expression) 1657 1658 expr = exp.Cast( 1659 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1660 to=exp.DataType(this=exp.DataType.Type.DATE), 1661 ) 1662 return self.sql(expr)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql