sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import build_scope, find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 257 unnest_parent = unnest.parent 258 if isinstance(unnest_parent, exp.Join): 259 select = unnest_parent.parent 260 if isinstance(select, exp.Select): 261 replace_column_name = ( 262 sequence_value_name 263 if isinstance(sequence_value_name, str) 264 else sequence_value_name.name 265 ) 266 267 scope = build_scope(select) 268 if scope: 269 for column in scope.columns: 270 if column.name.lower() == replace_column_name.lower(): 271 column.replace( 272 date_add.as_(replace_column_name) 273 if isinstance(column.parent, exp.Select) 274 else date_add 275 ) 276 277 lateral = exp.Lateral(this=unnest_parent.this.pop()) 278 unnest_parent.replace(exp.Join(this=lateral)) 279 else: 280 unnest.replace( 281 exp.select(date_add.as_(sequence_value_name)) 282 .from_(unnest.copy()) 283 .subquery(unnest_alias) 284 ) 285 286 287def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 288 if isinstance(expression, exp.Select): 289 for generate_date_array in expression.find_all(exp.GenerateDateArray): 290 parent = generate_date_array.parent 291 292 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 293 # query is the following (it'll be unnested properly on the next iteration due to copy): 294 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 295 if not isinstance(parent, exp.Unnest): 296 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 297 generate_date_array.replace( 298 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 299 ) 300 301 if ( 302 isinstance(parent, exp.Unnest) 303 and isinstance(parent.parent, (exp.From, exp.Join)) 304 and len(parent.expressions) == 1 305 ): 306 _unnest_generate_date_array(parent) 307 308 return expression 309 310 311def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 312 def _builder(args: t.List) -> E: 313 return expr_type( 314 this=seq_get(args, 0), 315 expression=seq_get(args, 1), 316 position=seq_get(args, 2), 317 occurrence=seq_get(args, 3), 318 parameters=seq_get(args, 4), 319 group=seq_get(args, 5) or exp.Literal.number(0), 320 ) 321 322 return _builder 323 324 325def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 326 # Other dialects don't support all of the following parameters, so we need to 327 # generate default values as necessary to ensure the transpilation is correct 328 group = expression.args.get("group") 329 330 # To avoid generating all these default values, we set group to None if 331 # it's 0 (also default value) which doesn't trigger the following chain 332 if group and group.name == "0": 333 group = None 334 335 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 336 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 337 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 338 339 return self.func( 340 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 341 expression.this, 342 expression.expression, 343 position, 344 occurrence, 345 parameters, 346 group, 347 ) 348 349 350def _json_extract_value_array_sql( 351 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 352) -> str: 353 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 354 ident = exp.to_identifier("x") 355 356 if isinstance(expression, exp.JSONValueArray): 357 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 358 else: 359 this = exp.ParseJSON(this=f"TO_JSON({ident})") 360 361 transform_lambda = exp.Lambda(expressions=[ident], this=this) 362 363 return self.func("TRANSFORM", json_extract, transform_lambda) 364 365 366def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 367 if isinstance(expression, exp.Select): 368 scope = build_scope(expression) 369 if not scope: 370 return expression 371 372 unnests = list(scope.find_all(exp.Unnest)) 373 374 if not unnests: 375 return expression 376 377 taken_source_names = set(scope.sources) 378 column_source: t.Dict[str, exp.Identifier] = {} 379 380 unnest_identifier: t.Optional[exp.Identifier] = None 381 orig_expression = expression.copy() 382 383 for unnest in unnests: 384 if not isinstance(unnest.parent, (exp.From, exp.Join)): 385 continue 386 387 # Try to infer column names produced by an unnest operator. This is only possible 388 # when we can peek into the (statically known) contents of the unnested value. 389 unnest_columns: t.Set[str] = set() 390 for unnest_expr in unnest.expressions: 391 if not isinstance(unnest_expr, exp.Array): 392 continue 393 394 for array_expr in unnest_expr.expressions: 395 if not ( 396 isinstance(array_expr, exp.Struct) 397 and array_expr.expressions 398 and all( 399 isinstance(struct_expr, exp.PropertyEQ) 400 for struct_expr in array_expr.expressions 401 ) 402 ): 403 continue 404 405 unnest_columns.update( 406 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 407 ) 408 break 409 410 if unnest_columns: 411 break 412 413 unnest_alias = unnest.args.get("alias") 414 if not unnest_alias: 415 alias_name = find_new_name(taken_source_names, "value") 416 taken_source_names.add(alias_name) 417 418 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 419 # will be corrected later, when we generate SQL for the `Unnest` AST node. 420 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 421 scope.replace(unnest, aliased_unnest) 422 423 unnest_identifier = aliased_unnest.args["alias"].columns[0] 424 else: 425 alias_columns = getattr(unnest_alias, "columns", []) 426 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 427 428 if not isinstance(unnest_identifier, exp.Identifier): 429 return orig_expression 430 431 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 432 433 for column in scope.columns: 434 if column.table: 435 continue 436 437 table = column_source.get(column.name.lower()) 438 if ( 439 unnest_identifier 440 and not table 441 and len(scope.sources) == 1 442 and column.name.lower() != unnest_identifier.name.lower() 443 ): 444 table = unnest_identifier 445 446 column.set("table", table and table.copy()) 447 448 return expression 449 450 451def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 452 if isinstance(expression, exp.Select): 453 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 454 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 455 # by Snowflake's parser. 456 # 457 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 458 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 459 unnest_aliases = set() 460 for unnest in find_all_in_scope(expression, exp.Unnest): 461 unnest_alias = unnest.args.get("alias") 462 if ( 463 isinstance(unnest_alias, exp.TableAlias) 464 and (unnest_alias.args.get("column_only") or not unnest_alias.this) 465 and len(unnest_alias.columns) == 1 466 ): 467 unnest_aliases.add(unnest_alias.columns[0].name) 468 469 if unnest_aliases: 470 for c in find_all_in_scope(expression, exp.Column): 471 if c.table in unnest_aliases: 472 bracket_lhs = c.args["table"] 473 bracket_rhs = exp.Literal.string(c.name) 474 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 475 476 if c.parent is expression: 477 # Retain column projection names by using aliases 478 c.replace(exp.alias_(bracket, c.this.copy())) 479 else: 480 c.replace(bracket) 481 482 return expression 483 484 485class Snowflake(Dialect): 486 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 487 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 488 NULL_ORDERING = "nulls_are_large" 489 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 490 SUPPORTS_USER_DEFINED_TYPES = False 491 SUPPORTS_SEMI_ANTI_JOIN = False 492 PREFER_CTE_ALIAS_COLUMN = True 493 TABLESAMPLE_SIZE_IS_PERCENT = True 494 COPY_PARAMS_ARE_CSV = False 495 ARRAY_AGG_INCLUDES_NULLS = None 496 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 497 TRY_CAST_REQUIRES_STRING = True 498 499 TIME_MAPPING = { 500 "YYYY": "%Y", 501 "yyyy": "%Y", 502 "YY": "%y", 503 "yy": "%y", 504 "MMMM": "%B", 505 "mmmm": "%B", 506 "MON": "%b", 507 "mon": "%b", 508 "MM": "%m", 509 "mm": "%m", 510 "DD": "%d", 511 "dd": "%-d", 512 "DY": "%a", 513 "dy": "%w", 514 "HH24": "%H", 515 "hh24": "%H", 516 "HH12": "%I", 517 "hh12": "%I", 518 "MI": "%M", 519 "mi": "%M", 520 "SS": "%S", 521 "ss": "%S", 522 "FF6": "%f", 523 "ff6": "%f", 524 } 525 526 DATE_PART_MAPPING = { 527 **Dialect.DATE_PART_MAPPING, 528 "ISOWEEK": "WEEKISO", 529 } 530 531 def quote_identifier(self, expression: E, identify: bool = True) -> E: 532 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 533 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 534 if ( 535 isinstance(expression, exp.Identifier) 536 and isinstance(expression.parent, exp.Table) 537 and expression.name.lower() == "dual" 538 ): 539 return expression # type: ignore 540 541 return super().quote_identifier(expression, identify=identify) 542 543 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 544 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 545 SINGLE_TOKENS.pop("$") 546 547 class Parser(parser.Parser): 548 IDENTIFY_PIVOT_STRINGS = True 549 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 550 COLON_IS_VARIANT_EXTRACT = True 551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 552 553 ID_VAR_TOKENS = { 554 *parser.Parser.ID_VAR_TOKENS, 555 TokenType.MATCH_CONDITION, 556 } 557 558 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 559 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 560 561 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 562 563 FUNCTIONS = { 564 **parser.Parser.FUNCTIONS, 565 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 566 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 567 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 568 this=seq_get(args, 1), expression=seq_get(args, 0) 569 ), 570 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 571 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 572 start=seq_get(args, 0), 573 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 574 step=seq_get(args, 2), 575 ), 576 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 577 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 579 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 581 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 582 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 583 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 584 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 585 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 586 "DATE_TRUNC": _date_trunc_to_time, 587 "DATEADD": _build_date_time_add(exp.DateAdd), 588 "DATEDIFF": _build_datediff, 589 "DIV0": _build_if_from_div0, 590 "EDITDISTANCE": lambda args: exp.Levenshtein( 591 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 592 ), 593 "FLATTEN": exp.Explode.from_arg_list, 594 "GET": exp.GetExtract.from_arg_list, 595 "GET_PATH": lambda args, dialect: exp.JSONExtract( 596 this=seq_get(args, 0), 597 expression=dialect.to_json_path(seq_get(args, 1)), 598 requires_json=True, 599 ), 600 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 601 "IFF": exp.If.from_arg_list, 602 "LAST_DAY": lambda args: exp.LastDay( 603 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 604 ), 605 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "NULLIFZERO": _build_if_from_nullifzero, 608 "OBJECT_CONSTRUCT": _build_object_construct, 609 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 610 "REGEXP_REPLACE": _build_regexp_replace, 611 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 612 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 613 "REPLACE": build_replace_with_optional_replacement, 614 "RLIKE": exp.RegexpLike.from_arg_list, 615 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 616 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 617 "TIMEADD": _build_date_time_add(exp.TimeAdd), 618 "TIMEDIFF": _build_datediff, 619 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 620 "TIMESTAMPDIFF": _build_datediff, 621 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 622 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 623 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 625 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 626 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 627 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 628 "TRY_TO_TIMESTAMP": _build_datetime( 629 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 630 ), 631 "TO_CHAR": build_timetostr_or_tochar, 632 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 633 "TO_NUMBER": lambda args: exp.ToNumber( 634 this=seq_get(args, 0), 635 format=seq_get(args, 1), 636 precision=seq_get(args, 2), 637 scale=seq_get(args, 3), 638 ), 639 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 640 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 641 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 642 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 644 "TO_VARCHAR": exp.ToChar.from_arg_list, 645 "ZEROIFNULL": _build_if_from_zeroifnull, 646 } 647 648 FUNCTION_PARSERS = { 649 **parser.Parser.FUNCTION_PARSERS, 650 "DATE_PART": lambda self: self._parse_date_part(), 651 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 652 "LISTAGG": lambda self: self._parse_string_agg(), 653 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 654 } 655 FUNCTION_PARSERS.pop("TRIM") 656 657 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 658 659 ALTER_PARSERS = { 660 **parser.Parser.ALTER_PARSERS, 661 "UNSET": lambda self: self.expression( 662 exp.Set, 663 tag=self._match_text_seq("TAG"), 664 expressions=self._parse_csv(self._parse_id_var), 665 unset=True, 666 ), 667 } 668 669 STATEMENT_PARSERS = { 670 **parser.Parser.STATEMENT_PARSERS, 671 TokenType.GET: lambda self: self._parse_get(), 672 TokenType.PUT: lambda self: self._parse_put(), 673 TokenType.SHOW: lambda self: self._parse_show(), 674 } 675 676 PROPERTY_PARSERS = { 677 **parser.Parser.PROPERTY_PARSERS, 678 "CREDENTIALS": lambda self: self._parse_credentials_property(), 679 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 680 "LOCATION": lambda self: self._parse_location_property(), 681 "TAG": lambda self: self._parse_tag(), 682 "USING": lambda self: self._match_text_seq("TEMPLATE") 683 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 684 } 685 686 TYPE_CONVERTERS = { 687 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 688 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 689 } 690 691 SHOW_PARSERS = { 692 "DATABASES": _show_parser("DATABASES"), 693 "TERSE DATABASES": _show_parser("DATABASES"), 694 "SCHEMAS": _show_parser("SCHEMAS"), 695 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 696 "OBJECTS": _show_parser("OBJECTS"), 697 "TERSE OBJECTS": _show_parser("OBJECTS"), 698 "TABLES": _show_parser("TABLES"), 699 "TERSE TABLES": _show_parser("TABLES"), 700 "VIEWS": _show_parser("VIEWS"), 701 "TERSE VIEWS": _show_parser("VIEWS"), 702 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 703 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 705 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 707 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "SEQUENCES": _show_parser("SEQUENCES"), 709 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 710 "STAGES": _show_parser("STAGES"), 711 "COLUMNS": _show_parser("COLUMNS"), 712 "USERS": _show_parser("USERS"), 713 "TERSE USERS": _show_parser("USERS"), 714 "FILE FORMATS": _show_parser("FILE FORMATS"), 715 "FUNCTIONS": _show_parser("FUNCTIONS"), 716 "PROCEDURES": _show_parser("PROCEDURES"), 717 "WAREHOUSES": _show_parser("WAREHOUSES"), 718 } 719 720 CONSTRAINT_PARSERS = { 721 **parser.Parser.CONSTRAINT_PARSERS, 722 "WITH": lambda self: self._parse_with_constraint(), 723 "MASKING": lambda self: self._parse_with_constraint(), 724 "PROJECTION": lambda self: self._parse_with_constraint(), 725 "TAG": lambda self: self._parse_with_constraint(), 726 } 727 728 STAGED_FILE_SINGLE_TOKENS = { 729 TokenType.DOT, 730 TokenType.MOD, 731 TokenType.SLASH, 732 } 733 734 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 735 736 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 737 738 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 739 740 LAMBDAS = { 741 **parser.Parser.LAMBDAS, 742 TokenType.ARROW: lambda self, expressions: self.expression( 743 exp.Lambda, 744 this=self._replace_lambda( 745 self._parse_assignment(), 746 expressions, 747 ), 748 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 749 ), 750 } 751 752 def _parse_use(self) -> exp.Use: 753 if self._match_text_seq("SECONDARY", "ROLES"): 754 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 755 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 756 return self.expression( 757 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 758 ) 759 760 return super()._parse_use() 761 762 def _negate_range( 763 self, this: t.Optional[exp.Expression] = None 764 ) -> t.Optional[exp.Expression]: 765 if not this: 766 return this 767 768 query = this.args.get("query") 769 if isinstance(this, exp.In) and isinstance(query, exp.Query): 770 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 771 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 772 # which can produce different results (most likely a SnowFlake bug). 773 # 774 # https://docs.snowflake.com/en/sql-reference/functions/in 775 # Context: https://github.com/tobymao/sqlglot/issues/3890 776 return self.expression( 777 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 778 ) 779 780 return self.expression(exp.Not, this=this) 781 782 def _parse_tag(self) -> exp.Tags: 783 return self.expression( 784 exp.Tags, 785 expressions=self._parse_wrapped_csv(self._parse_property), 786 ) 787 788 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 789 if self._prev.token_type != TokenType.WITH: 790 self._retreat(self._index - 1) 791 792 if self._match_text_seq("MASKING", "POLICY"): 793 policy = self._parse_column() 794 return self.expression( 795 exp.MaskingPolicyColumnConstraint, 796 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 797 expressions=self._match(TokenType.USING) 798 and self._parse_wrapped_csv(self._parse_id_var), 799 ) 800 if self._match_text_seq("PROJECTION", "POLICY"): 801 policy = self._parse_column() 802 return self.expression( 803 exp.ProjectionPolicyColumnConstraint, 804 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 805 ) 806 if self._match(TokenType.TAG): 807 return self._parse_tag() 808 809 return None 810 811 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 812 if self._match(TokenType.TAG): 813 return self._parse_tag() 814 815 return super()._parse_with_property() 816 817 def _parse_create(self) -> exp.Create | exp.Command: 818 expression = super()._parse_create() 819 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 820 # Replace the Table node with the enclosed Identifier 821 expression.this.replace(expression.this.this) 822 823 return expression 824 825 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 826 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 827 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 828 this = self._parse_var() or self._parse_type() 829 830 if not this: 831 return None 832 833 self._match(TokenType.COMMA) 834 expression = self._parse_bitwise() 835 this = map_date_part(this) 836 name = this.name.upper() 837 838 if name.startswith("EPOCH"): 839 if name == "EPOCH_MILLISECOND": 840 scale = 10**3 841 elif name == "EPOCH_MICROSECOND": 842 scale = 10**6 843 elif name == "EPOCH_NANOSECOND": 844 scale = 10**9 845 else: 846 scale = None 847 848 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 849 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 850 851 if scale: 852 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 853 854 return to_unix 855 856 return self.expression(exp.Extract, this=this, expression=expression) 857 858 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 859 if is_map: 860 # Keys are strings in Snowflake's objects, see also: 861 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 862 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 863 return self._parse_slice(self._parse_string()) 864 865 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 866 867 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 868 lateral = super()._parse_lateral() 869 if not lateral: 870 return lateral 871 872 if isinstance(lateral.this, exp.Explode): 873 table_alias = lateral.args.get("alias") 874 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 875 if table_alias and not table_alias.args.get("columns"): 876 table_alias.set("columns", columns) 877 elif not table_alias: 878 exp.alias_(lateral, "_flattened", table=columns, copy=False) 879 880 return lateral 881 882 def _parse_table_parts( 883 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 884 ) -> exp.Table: 885 # https://docs.snowflake.com/en/user-guide/querying-stage 886 if self._match(TokenType.STRING, advance=False): 887 table = self._parse_string() 888 elif self._match_text_seq("@", advance=False): 889 table = self._parse_location_path() 890 else: 891 table = None 892 893 if table: 894 file_format = None 895 pattern = None 896 897 wrapped = self._match(TokenType.L_PAREN) 898 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 899 if self._match_text_seq("FILE_FORMAT", "=>"): 900 file_format = self._parse_string() or super()._parse_table_parts( 901 is_db_reference=is_db_reference 902 ) 903 elif self._match_text_seq("PATTERN", "=>"): 904 pattern = self._parse_string() 905 else: 906 break 907 908 self._match(TokenType.COMMA) 909 910 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 911 else: 912 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 913 914 return table 915 916 def _parse_table( 917 self, 918 schema: bool = False, 919 joins: bool = False, 920 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 921 parse_bracket: bool = False, 922 is_db_reference: bool = False, 923 parse_partition: bool = False, 924 consume_pipe: bool = False, 925 ) -> t.Optional[exp.Expression]: 926 table = super()._parse_table( 927 schema=schema, 928 joins=joins, 929 alias_tokens=alias_tokens, 930 parse_bracket=parse_bracket, 931 is_db_reference=is_db_reference, 932 parse_partition=parse_partition, 933 ) 934 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 935 table_from_rows = table.this 936 for arg in exp.TableFromRows.arg_types: 937 if arg != "this": 938 table_from_rows.set(arg, table.args.get(arg)) 939 940 table = table_from_rows 941 942 return table 943 944 def _parse_id_var( 945 self, 946 any_token: bool = True, 947 tokens: t.Optional[t.Collection[TokenType]] = None, 948 ) -> t.Optional[exp.Expression]: 949 if self._match_text_seq("IDENTIFIER", "("): 950 identifier = ( 951 super()._parse_id_var(any_token=any_token, tokens=tokens) 952 or self._parse_string() 953 ) 954 self._match_r_paren() 955 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 956 957 return super()._parse_id_var(any_token=any_token, tokens=tokens) 958 959 def _parse_show_snowflake(self, this: str) -> exp.Show: 960 scope = None 961 scope_kind = None 962 963 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 964 # which is syntactically valid but has no effect on the output 965 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 966 967 history = self._match_text_seq("HISTORY") 968 969 like = self._parse_string() if self._match(TokenType.LIKE) else None 970 971 if self._match(TokenType.IN): 972 if self._match_text_seq("ACCOUNT"): 973 scope_kind = "ACCOUNT" 974 elif self._match_text_seq("CLASS"): 975 scope_kind = "CLASS" 976 scope = self._parse_table_parts() 977 elif self._match_text_seq("APPLICATION"): 978 scope_kind = "APPLICATION" 979 if self._match_text_seq("PACKAGE"): 980 scope_kind += " PACKAGE" 981 scope = self._parse_table_parts() 982 elif self._match_set(self.DB_CREATABLES): 983 scope_kind = self._prev.text.upper() 984 if self._curr: 985 scope = self._parse_table_parts() 986 elif self._curr: 987 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 988 scope = self._parse_table_parts() 989 990 return self.expression( 991 exp.Show, 992 **{ 993 "terse": terse, 994 "this": this, 995 "history": history, 996 "like": like, 997 "scope": scope, 998 "scope_kind": scope_kind, 999 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1000 "limit": self._parse_limit(), 1001 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1002 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1003 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1004 }, 1005 ) 1006 1007 def _parse_put(self) -> exp.Put | exp.Command: 1008 if self._curr.token_type != TokenType.STRING: 1009 return self._parse_as_command(self._prev) 1010 1011 return self.expression( 1012 exp.Put, 1013 this=self._parse_string(), 1014 target=self._parse_location_path(), 1015 properties=self._parse_properties(), 1016 ) 1017 1018 def _parse_get(self) -> t.Optional[exp.Expression]: 1019 start = self._prev 1020 1021 # If we detect GET( then we need to parse a function, not a statement 1022 if self._match(TokenType.L_PAREN): 1023 self._retreat(self._index - 2) 1024 return self._parse_expression() 1025 1026 target = self._parse_location_path() 1027 1028 # Parse as command if unquoted file path 1029 if self._curr.token_type == TokenType.URI_START: 1030 return self._parse_as_command(start) 1031 1032 return self.expression( 1033 exp.Get, 1034 this=self._parse_string(), 1035 target=target, 1036 properties=self._parse_properties(), 1037 ) 1038 1039 def _parse_location_property(self) -> exp.LocationProperty: 1040 self._match(TokenType.EQ) 1041 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1042 1043 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1044 # Parse either a subquery or a staged file 1045 return ( 1046 self._parse_select(table=True, parse_subquery_alias=False) 1047 if self._match(TokenType.L_PAREN, advance=False) 1048 else self._parse_table_parts() 1049 ) 1050 1051 def _parse_location_path(self) -> exp.Var: 1052 start = self._curr 1053 self._advance_any(ignore_reserved=True) 1054 1055 # We avoid consuming a comma token because external tables like @foo and @bar 1056 # can be joined in a query with a comma separator, as well as closing paren 1057 # in case of subqueries 1058 while self._is_connected() and not self._match_set( 1059 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1060 ): 1061 self._advance_any(ignore_reserved=True) 1062 1063 return exp.var(self._find_sql(start, self._prev)) 1064 1065 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1066 this = super()._parse_lambda_arg() 1067 1068 if not this: 1069 return this 1070 1071 typ = self._parse_types() 1072 1073 if typ: 1074 return self.expression(exp.Cast, this=this, to=typ) 1075 1076 return this 1077 1078 def _parse_foreign_key(self) -> exp.ForeignKey: 1079 # inlineFK, the REFERENCES columns are implied 1080 if self._match(TokenType.REFERENCES, advance=False): 1081 return self.expression(exp.ForeignKey) 1082 1083 # outoflineFK, explicitly names the columns 1084 return super()._parse_foreign_key() 1085 1086 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1087 self._match(TokenType.EQ) 1088 if self._match(TokenType.L_PAREN, advance=False): 1089 expressions = self._parse_wrapped_options() 1090 else: 1091 expressions = [self._parse_format_name()] 1092 1093 return self.expression( 1094 exp.FileFormatProperty, 1095 expressions=expressions, 1096 ) 1097 1098 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1099 return self.expression( 1100 exp.CredentialsProperty, 1101 expressions=self._parse_wrapped_options(), 1102 ) 1103 1104 def _parse_semantic_view(self) -> exp.SemanticView: 1105 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1106 1107 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1108 if self._match_text_seq("DIMENSIONS"): 1109 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1110 if self._match_text_seq("METRICS"): 1111 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("WHERE"): 1113 kwargs["where"] = self._parse_expression() 1114 1115 return self.expression(exp.SemanticView, **kwargs) 1116 1117 class Tokenizer(tokens.Tokenizer): 1118 STRING_ESCAPES = ["\\", "'"] 1119 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1120 RAW_STRINGS = ["$$"] 1121 COMMENTS = ["--", "//", ("/*", "*/")] 1122 NESTED_COMMENTS = False 1123 1124 KEYWORDS = { 1125 **tokens.Tokenizer.KEYWORDS, 1126 "FILE://": TokenType.URI_START, 1127 "BYTEINT": TokenType.INT, 1128 "EXCLUDE": TokenType.EXCEPT, 1129 "FILE FORMAT": TokenType.FILE_FORMAT, 1130 "GET": TokenType.GET, 1131 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1132 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1133 "MINUS": TokenType.EXCEPT, 1134 "NCHAR VARYING": TokenType.VARCHAR, 1135 "PUT": TokenType.PUT, 1136 "REMOVE": TokenType.COMMAND, 1137 "RM": TokenType.COMMAND, 1138 "SAMPLE": TokenType.TABLE_SAMPLE, 1139 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1140 "SQL_DOUBLE": TokenType.DOUBLE, 1141 "SQL_VARCHAR": TokenType.VARCHAR, 1142 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1143 "TAG": TokenType.TAG, 1144 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1145 "TOP": TokenType.TOP, 1146 "WAREHOUSE": TokenType.WAREHOUSE, 1147 "STAGE": TokenType.STAGE, 1148 "STREAMLIT": TokenType.STREAMLIT, 1149 } 1150 KEYWORDS.pop("/*+") 1151 1152 SINGLE_TOKENS = { 1153 **tokens.Tokenizer.SINGLE_TOKENS, 1154 "$": TokenType.PARAMETER, 1155 } 1156 1157 VAR_SINGLE_TOKENS = {"$"} 1158 1159 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1160 1161 class Generator(generator.Generator): 1162 PARAMETER_TOKEN = "$" 1163 MATCHED_BY_SOURCE = False 1164 SINGLE_STRING_INTERVAL = True 1165 JOIN_HINTS = False 1166 TABLE_HINTS = False 1167 QUERY_HINTS = False 1168 AGGREGATE_FILTER_SUPPORTED = False 1169 SUPPORTS_TABLE_COPY = False 1170 COLLATE_IS_FUNC = True 1171 LIMIT_ONLY_LITERALS = True 1172 JSON_KEY_VALUE_PAIR_SEP = "," 1173 INSERT_OVERWRITE = " OVERWRITE INTO" 1174 STRUCT_DELIMITER = ("(", ")") 1175 COPY_PARAMS_ARE_WRAPPED = False 1176 COPY_PARAMS_EQ_REQUIRED = True 1177 STAR_EXCEPT = "EXCLUDE" 1178 SUPPORTS_EXPLODING_PROJECTIONS = False 1179 ARRAY_CONCAT_IS_VAR_LEN = False 1180 SUPPORTS_CONVERT_TIMEZONE = True 1181 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1182 SUPPORTS_MEDIAN = True 1183 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1184 SUPPORTS_DECODE_CASE = True 1185 IS_BOOL_ALLOWED = False 1186 1187 TRANSFORMS = { 1188 **generator.Generator.TRANSFORMS, 1189 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1190 exp.ArgMax: rename_func("MAX_BY"), 1191 exp.ArgMin: rename_func("MIN_BY"), 1192 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1193 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1194 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1195 exp.AtTimeZone: lambda self, e: self.func( 1196 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1197 ), 1198 exp.BitwiseOr: rename_func("BITOR"), 1199 exp.BitwiseXor: rename_func("BITXOR"), 1200 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1201 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1202 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1203 exp.DateAdd: date_delta_sql("DATEADD"), 1204 exp.DateDiff: date_delta_sql("DATEDIFF"), 1205 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1206 exp.DatetimeDiff: timestampdiff_sql, 1207 exp.DateStrToDate: datestrtodate_sql, 1208 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1209 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1210 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1211 exp.DayOfYear: rename_func("DAYOFYEAR"), 1212 exp.Explode: rename_func("FLATTEN"), 1213 exp.Extract: lambda self, e: self.func( 1214 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1215 ), 1216 exp.FileFormatProperty: lambda self, 1217 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1218 exp.FromTimeZone: lambda self, e: self.func( 1219 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1220 ), 1221 exp.GenerateSeries: lambda self, e: self.func( 1222 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1223 ), 1224 exp.GetExtract: rename_func("GET"), 1225 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1226 exp.If: if_sql(name="IFF", false_value="NULL"), 1227 exp.JSONExtractArray: _json_extract_value_array_sql, 1228 exp.JSONExtractScalar: lambda self, e: self.func( 1229 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1230 ), 1231 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1232 exp.JSONPathRoot: lambda *_: "", 1233 exp.JSONValueArray: _json_extract_value_array_sql, 1234 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1235 rename_func("EDITDISTANCE") 1236 ), 1237 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1238 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1239 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1240 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1241 exp.MakeInterval: no_make_interval_sql, 1242 exp.Max: max_or_greatest, 1243 exp.Min: min_or_least, 1244 exp.ParseJSON: lambda self, e: self.func( 1245 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1246 ), 1247 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1248 exp.PercentileCont: transforms.preprocess( 1249 [transforms.add_within_group_for_percentiles] 1250 ), 1251 exp.PercentileDisc: transforms.preprocess( 1252 [transforms.add_within_group_for_percentiles] 1253 ), 1254 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1255 exp.RegexpExtract: _regexpextract_sql, 1256 exp.RegexpExtractAll: _regexpextract_sql, 1257 exp.RegexpILike: _regexpilike_sql, 1258 exp.Rand: rename_func("RANDOM"), 1259 exp.Select: transforms.preprocess( 1260 [ 1261 transforms.eliminate_window_clause, 1262 transforms.eliminate_distinct_on, 1263 transforms.explode_projection_to_unnest(), 1264 transforms.eliminate_semi_and_anti_joins, 1265 _transform_generate_date_array, 1266 _qualify_unnested_columns, 1267 _eliminate_dot_variant_lookup, 1268 ] 1269 ), 1270 exp.SHA: rename_func("SHA1"), 1271 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1272 exp.StartsWith: rename_func("STARTSWITH"), 1273 exp.EndsWith: rename_func("ENDSWITH"), 1274 exp.StrPosition: lambda self, e: strposition_sql( 1275 self, e, func_name="CHARINDEX", supports_position=True 1276 ), 1277 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1278 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1279 exp.Stuff: rename_func("INSERT"), 1280 exp.StPoint: rename_func("ST_MAKEPOINT"), 1281 exp.TimeAdd: date_delta_sql("TIMEADD"), 1282 exp.Timestamp: no_timestamp_sql, 1283 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1284 exp.TimestampDiff: lambda self, e: self.func( 1285 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1286 ), 1287 exp.TimestampTrunc: timestamptrunc_sql(), 1288 exp.TimeStrToTime: timestrtotime_sql, 1289 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1290 exp.ToArray: rename_func("TO_ARRAY"), 1291 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1292 exp.ToDouble: rename_func("TO_DOUBLE"), 1293 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1294 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1295 exp.TsOrDsToDate: lambda self, e: self.func( 1296 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1297 ), 1298 exp.TsOrDsToTime: lambda self, e: self.func( 1299 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1300 ), 1301 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1302 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1303 exp.Uuid: rename_func("UUID_STRING"), 1304 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1305 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1306 exp.Xor: rename_func("BOOLXOR"), 1307 } 1308 1309 SUPPORTED_JSON_PATH_PARTS = { 1310 exp.JSONPathKey, 1311 exp.JSONPathRoot, 1312 exp.JSONPathSubscript, 1313 } 1314 1315 TYPE_MAPPING = { 1316 **generator.Generator.TYPE_MAPPING, 1317 exp.DataType.Type.NESTED: "OBJECT", 1318 exp.DataType.Type.STRUCT: "OBJECT", 1319 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1320 } 1321 1322 TOKEN_MAPPING = { 1323 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1324 } 1325 1326 PROPERTIES_LOCATION = { 1327 **generator.Generator.PROPERTIES_LOCATION, 1328 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1329 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1330 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1331 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1332 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1333 } 1334 1335 UNSUPPORTED_VALUES_EXPRESSIONS = { 1336 exp.Map, 1337 exp.StarMap, 1338 exp.Struct, 1339 exp.VarMap, 1340 } 1341 1342 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1343 1344 def with_properties(self, properties: exp.Properties) -> str: 1345 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1346 1347 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1348 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1349 values_as_table = False 1350 1351 return super().values_sql(expression, values_as_table=values_as_table) 1352 1353 def datatype_sql(self, expression: exp.DataType) -> str: 1354 expressions = expression.expressions 1355 if ( 1356 expressions 1357 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1358 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1359 ): 1360 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1361 return "OBJECT" 1362 1363 return super().datatype_sql(expression) 1364 1365 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1366 return self.func( 1367 "TO_NUMBER", 1368 expression.this, 1369 expression.args.get("format"), 1370 expression.args.get("precision"), 1371 expression.args.get("scale"), 1372 ) 1373 1374 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1375 milli = expression.args.get("milli") 1376 if milli is not None: 1377 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1378 expression.set("nano", milli_to_nano) 1379 1380 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1381 1382 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1383 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1384 return self.func("TO_GEOGRAPHY", expression.this) 1385 if expression.is_type(exp.DataType.Type.GEOMETRY): 1386 return self.func("TO_GEOMETRY", expression.this) 1387 1388 return super().cast_sql(expression, safe_prefix=safe_prefix) 1389 1390 def trycast_sql(self, expression: exp.TryCast) -> str: 1391 value = expression.this 1392 1393 if value.type is None: 1394 from sqlglot.optimizer.annotate_types import annotate_types 1395 1396 value = annotate_types(value, dialect=self.dialect) 1397 1398 # Snowflake requires that TRY_CAST's value be a string 1399 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1400 # if we can deduce that the value is a string, then we can generate TRY_CAST 1401 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1402 return super().trycast_sql(expression) 1403 1404 return self.cast_sql(expression) 1405 1406 def log_sql(self, expression: exp.Log) -> str: 1407 if not expression.expression: 1408 return self.func("LN", expression.this) 1409 1410 return super().log_sql(expression) 1411 1412 def unnest_sql(self, expression: exp.Unnest) -> str: 1413 unnest_alias = expression.args.get("alias") 1414 offset = expression.args.get("offset") 1415 1416 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1417 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1418 1419 columns = [ 1420 exp.to_identifier("seq"), 1421 exp.to_identifier("key"), 1422 exp.to_identifier("path"), 1423 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1424 value, 1425 exp.to_identifier("this"), 1426 ] 1427 1428 if unnest_alias: 1429 unnest_alias.set("columns", columns) 1430 else: 1431 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1432 1433 table_input = self.sql(expression.expressions[0]) 1434 if not table_input.startswith("INPUT =>"): 1435 table_input = f"INPUT => {table_input}" 1436 1437 expression_parent = expression.parent 1438 1439 explode = ( 1440 f"FLATTEN({table_input})" 1441 if isinstance(expression_parent, exp.Lateral) 1442 else f"TABLE(FLATTEN({table_input}))" 1443 ) 1444 alias = self.sql(unnest_alias) 1445 alias = f" AS {alias}" if alias else "" 1446 value = ( 1447 "" 1448 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1449 else f"{value} FROM " 1450 ) 1451 1452 return f"{value}{explode}{alias}" 1453 1454 def show_sql(self, expression: exp.Show) -> str: 1455 terse = "TERSE " if expression.args.get("terse") else "" 1456 history = " HISTORY" if expression.args.get("history") else "" 1457 like = self.sql(expression, "like") 1458 like = f" LIKE {like}" if like else "" 1459 1460 scope = self.sql(expression, "scope") 1461 scope = f" {scope}" if scope else "" 1462 1463 scope_kind = self.sql(expression, "scope_kind") 1464 if scope_kind: 1465 scope_kind = f" IN {scope_kind}" 1466 1467 starts_with = self.sql(expression, "starts_with") 1468 if starts_with: 1469 starts_with = f" STARTS WITH {starts_with}" 1470 1471 limit = self.sql(expression, "limit") 1472 1473 from_ = self.sql(expression, "from") 1474 if from_: 1475 from_ = f" FROM {from_}" 1476 1477 privileges = self.expressions(expression, key="privileges", flat=True) 1478 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1479 1480 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1481 1482 def describe_sql(self, expression: exp.Describe) -> str: 1483 # Default to table if kind is unknown 1484 kind_value = expression.args.get("kind") or "TABLE" 1485 kind = f" {kind_value}" if kind_value else "" 1486 this = f" {self.sql(expression, 'this')}" 1487 expressions = self.expressions(expression, flat=True) 1488 expressions = f" {expressions}" if expressions else "" 1489 return f"DESCRIBE{kind}{this}{expressions}" 1490 1491 def generatedasidentitycolumnconstraint_sql( 1492 self, expression: exp.GeneratedAsIdentityColumnConstraint 1493 ) -> str: 1494 start = expression.args.get("start") 1495 start = f" START {start}" if start else "" 1496 increment = expression.args.get("increment") 1497 increment = f" INCREMENT {increment}" if increment else "" 1498 1499 order = expression.args.get("order") 1500 if order is not None: 1501 order_clause = " ORDER" if order else " NOORDER" 1502 else: 1503 order_clause = "" 1504 1505 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1506 1507 def cluster_sql(self, expression: exp.Cluster) -> str: 1508 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1509 1510 def struct_sql(self, expression: exp.Struct) -> str: 1511 keys = [] 1512 values = [] 1513 1514 for i, e in enumerate(expression.expressions): 1515 if isinstance(e, exp.PropertyEQ): 1516 keys.append( 1517 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1518 ) 1519 values.append(e.expression) 1520 else: 1521 keys.append(exp.Literal.string(f"_{i}")) 1522 values.append(e) 1523 1524 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1525 1526 @unsupported_args("weight", "accuracy") 1527 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1528 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1529 1530 def alterset_sql(self, expression: exp.AlterSet) -> str: 1531 exprs = self.expressions(expression, flat=True) 1532 exprs = f" {exprs}" if exprs else "" 1533 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1534 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1535 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1536 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1537 tag = self.expressions(expression, key="tag", flat=True) 1538 tag = f" TAG {tag}" if tag else "" 1539 1540 return f"SET{exprs}{file_format}{copy_options}{tag}" 1541 1542 def strtotime_sql(self, expression: exp.StrToTime): 1543 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1544 return self.func( 1545 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1546 ) 1547 1548 def timestampsub_sql(self, expression: exp.TimestampSub): 1549 return self.sql( 1550 exp.TimestampAdd( 1551 this=expression.this, 1552 expression=expression.expression * -1, 1553 unit=expression.unit, 1554 ) 1555 ) 1556 1557 def jsonextract_sql(self, expression: exp.JSONExtract): 1558 this = expression.this 1559 1560 # JSON strings are valid coming from other dialects such as BQ so 1561 # for these cases we PARSE_JSON preemptively 1562 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1563 "requires_json" 1564 ): 1565 this = exp.ParseJSON(this=this) 1566 1567 return self.func( 1568 "GET_PATH", 1569 this, 1570 expression.expression, 1571 ) 1572 1573 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1574 this = expression.this 1575 if this.is_string: 1576 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1577 1578 return self.func("TO_CHAR", this, self.format_time(expression)) 1579 1580 def datesub_sql(self, expression: exp.DateSub) -> str: 1581 value = expression.expression 1582 if value: 1583 value.replace(value * (-1)) 1584 else: 1585 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1586 1587 return date_delta_sql("DATEADD")(self, expression) 1588 1589 def select_sql(self, expression: exp.Select) -> str: 1590 limit = expression.args.get("limit") 1591 offset = expression.args.get("offset") 1592 if offset and not limit: 1593 expression.limit(exp.Null(), copy=False) 1594 return super().select_sql(expression) 1595 1596 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1597 is_materialized = expression.find(exp.MaterializedProperty) 1598 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1599 1600 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1601 # For materialized views, COPY GRANTS is located *before* the columns list 1602 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1603 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1604 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1605 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1606 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1607 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1608 1609 this_name = self.sql(expression.this, "this") 1610 copy_grants = self.sql(copy_grants_property) 1611 this_schema = self.schema_columns_sql(expression.this) 1612 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1613 1614 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1615 1616 return super().createable_sql(expression, locations) 1617 1618 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1619 this = expression.this 1620 1621 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1622 # and add it later as part of the WITHIN GROUP clause 1623 order = this if isinstance(this, exp.Order) else None 1624 if order: 1625 expression.set("this", order.this.pop()) 1626 1627 expr_sql = super().arrayagg_sql(expression) 1628 1629 if order: 1630 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1631 1632 return expr_sql 1633 1634 def array_sql(self, expression: exp.Array) -> str: 1635 expressions = expression.expressions 1636 1637 first_expr = seq_get(expressions, 0) 1638 if isinstance(first_expr, exp.Select): 1639 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1640 if first_expr.text("kind").upper() == "STRUCT": 1641 object_construct_args = [] 1642 for expr in first_expr.expressions: 1643 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1644 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1645 name = expr.this if isinstance(expr, exp.Alias) else expr 1646 1647 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1648 1649 array_agg = exp.ArrayAgg( 1650 this=_build_object_construct(args=object_construct_args) 1651 ) 1652 1653 first_expr.set("kind", None) 1654 first_expr.set("expressions", [array_agg]) 1655 1656 return self.sql(first_expr.subquery()) 1657 1658 return inline_array_sql(self, expression) 1659 1660 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1661 zone = self.sql(expression, "this") 1662 if not zone: 1663 return super().currentdate_sql(expression) 1664 1665 expr = exp.Cast( 1666 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1667 to=exp.DataType(this=exp.DataType.Type.DATE), 1668 ) 1669 return self.sql(expr) 1670 1671 def dot_sql(self, expression: exp.Dot) -> str: 1672 this = expression.this 1673 1674 if not this.type: 1675 from sqlglot.optimizer.annotate_types import annotate_types 1676 1677 this = annotate_types(this, dialect=self.dialect) 1678 1679 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1680 # Generate colon notation for the top level STRUCT 1681 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1682 1683 return super().dot_sql(expression)
486class Snowflake(Dialect): 487 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 488 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 489 NULL_ORDERING = "nulls_are_large" 490 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 491 SUPPORTS_USER_DEFINED_TYPES = False 492 SUPPORTS_SEMI_ANTI_JOIN = False 493 PREFER_CTE_ALIAS_COLUMN = True 494 TABLESAMPLE_SIZE_IS_PERCENT = True 495 COPY_PARAMS_ARE_CSV = False 496 ARRAY_AGG_INCLUDES_NULLS = None 497 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 498 TRY_CAST_REQUIRES_STRING = True 499 500 TIME_MAPPING = { 501 "YYYY": "%Y", 502 "yyyy": "%Y", 503 "YY": "%y", 504 "yy": "%y", 505 "MMMM": "%B", 506 "mmmm": "%B", 507 "MON": "%b", 508 "mon": "%b", 509 "MM": "%m", 510 "mm": "%m", 511 "DD": "%d", 512 "dd": "%-d", 513 "DY": "%a", 514 "dy": "%w", 515 "HH24": "%H", 516 "hh24": "%H", 517 "HH12": "%I", 518 "hh12": "%I", 519 "MI": "%M", 520 "mi": "%M", 521 "SS": "%S", 522 "ss": "%S", 523 "FF6": "%f", 524 "ff6": "%f", 525 } 526 527 DATE_PART_MAPPING = { 528 **Dialect.DATE_PART_MAPPING, 529 "ISOWEEK": "WEEKISO", 530 } 531 532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify) 543 544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$") 547 548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET": exp.GetExtract.from_arg_list, 596 "GET_PATH": lambda args, dialect: exp.JSONExtract( 597 this=seq_get(args, 0), 598 expression=dialect.to_json_path(seq_get(args, 1)), 599 requires_json=True, 600 ), 601 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 602 "IFF": exp.If.from_arg_list, 603 "LAST_DAY": lambda args: exp.LastDay( 604 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 605 ), 606 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 608 "NULLIFZERO": _build_if_from_nullifzero, 609 "OBJECT_CONSTRUCT": _build_object_construct, 610 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 611 "REGEXP_REPLACE": _build_regexp_replace, 612 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 613 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 614 "REPLACE": build_replace_with_optional_replacement, 615 "RLIKE": exp.RegexpLike.from_arg_list, 616 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 617 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 618 "TIMEADD": _build_date_time_add(exp.TimeAdd), 619 "TIMEDIFF": _build_datediff, 620 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 621 "TIMESTAMPDIFF": _build_datediff, 622 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 623 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 624 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 625 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 626 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 627 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 628 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 629 "TRY_TO_TIMESTAMP": _build_datetime( 630 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 631 ), 632 "TO_CHAR": build_timetostr_or_tochar, 633 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 634 "TO_NUMBER": lambda args: exp.ToNumber( 635 this=seq_get(args, 0), 636 format=seq_get(args, 1), 637 precision=seq_get(args, 2), 638 scale=seq_get(args, 3), 639 ), 640 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 641 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 642 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 643 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 644 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 645 "TO_VARCHAR": exp.ToChar.from_arg_list, 646 "ZEROIFNULL": _build_if_from_zeroifnull, 647 } 648 649 FUNCTION_PARSERS = { 650 **parser.Parser.FUNCTION_PARSERS, 651 "DATE_PART": lambda self: self._parse_date_part(), 652 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 653 "LISTAGG": lambda self: self._parse_string_agg(), 654 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 655 } 656 FUNCTION_PARSERS.pop("TRIM") 657 658 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 659 660 ALTER_PARSERS = { 661 **parser.Parser.ALTER_PARSERS, 662 "UNSET": lambda self: self.expression( 663 exp.Set, 664 tag=self._match_text_seq("TAG"), 665 expressions=self._parse_csv(self._parse_id_var), 666 unset=True, 667 ), 668 } 669 670 STATEMENT_PARSERS = { 671 **parser.Parser.STATEMENT_PARSERS, 672 TokenType.GET: lambda self: self._parse_get(), 673 TokenType.PUT: lambda self: self._parse_put(), 674 TokenType.SHOW: lambda self: self._parse_show(), 675 } 676 677 PROPERTY_PARSERS = { 678 **parser.Parser.PROPERTY_PARSERS, 679 "CREDENTIALS": lambda self: self._parse_credentials_property(), 680 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 681 "LOCATION": lambda self: self._parse_location_property(), 682 "TAG": lambda self: self._parse_tag(), 683 "USING": lambda self: self._match_text_seq("TEMPLATE") 684 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 685 } 686 687 TYPE_CONVERTERS = { 688 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 689 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 690 } 691 692 SHOW_PARSERS = { 693 "DATABASES": _show_parser("DATABASES"), 694 "TERSE DATABASES": _show_parser("DATABASES"), 695 "SCHEMAS": _show_parser("SCHEMAS"), 696 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 697 "OBJECTS": _show_parser("OBJECTS"), 698 "TERSE OBJECTS": _show_parser("OBJECTS"), 699 "TABLES": _show_parser("TABLES"), 700 "TERSE TABLES": _show_parser("TABLES"), 701 "VIEWS": _show_parser("VIEWS"), 702 "TERSE VIEWS": _show_parser("VIEWS"), 703 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "SEQUENCES": _show_parser("SEQUENCES"), 710 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 711 "STAGES": _show_parser("STAGES"), 712 "COLUMNS": _show_parser("COLUMNS"), 713 "USERS": _show_parser("USERS"), 714 "TERSE USERS": _show_parser("USERS"), 715 "FILE FORMATS": _show_parser("FILE FORMATS"), 716 "FUNCTIONS": _show_parser("FUNCTIONS"), 717 "PROCEDURES": _show_parser("PROCEDURES"), 718 "WAREHOUSES": _show_parser("WAREHOUSES"), 719 } 720 721 CONSTRAINT_PARSERS = { 722 **parser.Parser.CONSTRAINT_PARSERS, 723 "WITH": lambda self: self._parse_with_constraint(), 724 "MASKING": lambda self: self._parse_with_constraint(), 725 "PROJECTION": lambda self: self._parse_with_constraint(), 726 "TAG": lambda self: self._parse_with_constraint(), 727 } 728 729 STAGED_FILE_SINGLE_TOKENS = { 730 TokenType.DOT, 731 TokenType.MOD, 732 TokenType.SLASH, 733 } 734 735 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 736 737 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 738 739 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 740 741 LAMBDAS = { 742 **parser.Parser.LAMBDAS, 743 TokenType.ARROW: lambda self, expressions: self.expression( 744 exp.Lambda, 745 this=self._replace_lambda( 746 self._parse_assignment(), 747 expressions, 748 ), 749 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 750 ), 751 } 752 753 def _parse_use(self) -> exp.Use: 754 if self._match_text_seq("SECONDARY", "ROLES"): 755 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 756 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 757 return self.expression( 758 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 759 ) 760 761 return super()._parse_use() 762 763 def _negate_range( 764 self, this: t.Optional[exp.Expression] = None 765 ) -> t.Optional[exp.Expression]: 766 if not this: 767 return this 768 769 query = this.args.get("query") 770 if isinstance(this, exp.In) and isinstance(query, exp.Query): 771 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 772 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 773 # which can produce different results (most likely a SnowFlake bug). 774 # 775 # https://docs.snowflake.com/en/sql-reference/functions/in 776 # Context: https://github.com/tobymao/sqlglot/issues/3890 777 return self.expression( 778 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 779 ) 780 781 return self.expression(exp.Not, this=this) 782 783 def _parse_tag(self) -> exp.Tags: 784 return self.expression( 785 exp.Tags, 786 expressions=self._parse_wrapped_csv(self._parse_property), 787 ) 788 789 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 790 if self._prev.token_type != TokenType.WITH: 791 self._retreat(self._index - 1) 792 793 if self._match_text_seq("MASKING", "POLICY"): 794 policy = self._parse_column() 795 return self.expression( 796 exp.MaskingPolicyColumnConstraint, 797 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 798 expressions=self._match(TokenType.USING) 799 and self._parse_wrapped_csv(self._parse_id_var), 800 ) 801 if self._match_text_seq("PROJECTION", "POLICY"): 802 policy = self._parse_column() 803 return self.expression( 804 exp.ProjectionPolicyColumnConstraint, 805 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 806 ) 807 if self._match(TokenType.TAG): 808 return self._parse_tag() 809 810 return None 811 812 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 813 if self._match(TokenType.TAG): 814 return self._parse_tag() 815 816 return super()._parse_with_property() 817 818 def _parse_create(self) -> exp.Create | exp.Command: 819 expression = super()._parse_create() 820 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 821 # Replace the Table node with the enclosed Identifier 822 expression.this.replace(expression.this.this) 823 824 return expression 825 826 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 827 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 828 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 829 this = self._parse_var() or self._parse_type() 830 831 if not this: 832 return None 833 834 self._match(TokenType.COMMA) 835 expression = self._parse_bitwise() 836 this = map_date_part(this) 837 name = this.name.upper() 838 839 if name.startswith("EPOCH"): 840 if name == "EPOCH_MILLISECOND": 841 scale = 10**3 842 elif name == "EPOCH_MICROSECOND": 843 scale = 10**6 844 elif name == "EPOCH_NANOSECOND": 845 scale = 10**9 846 else: 847 scale = None 848 849 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 850 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 851 852 if scale: 853 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 854 855 return to_unix 856 857 return self.expression(exp.Extract, this=this, expression=expression) 858 859 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 860 if is_map: 861 # Keys are strings in Snowflake's objects, see also: 862 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 863 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 864 return self._parse_slice(self._parse_string()) 865 866 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 867 868 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 869 lateral = super()._parse_lateral() 870 if not lateral: 871 return lateral 872 873 if isinstance(lateral.this, exp.Explode): 874 table_alias = lateral.args.get("alias") 875 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 876 if table_alias and not table_alias.args.get("columns"): 877 table_alias.set("columns", columns) 878 elif not table_alias: 879 exp.alias_(lateral, "_flattened", table=columns, copy=False) 880 881 return lateral 882 883 def _parse_table_parts( 884 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 885 ) -> exp.Table: 886 # https://docs.snowflake.com/en/user-guide/querying-stage 887 if self._match(TokenType.STRING, advance=False): 888 table = self._parse_string() 889 elif self._match_text_seq("@", advance=False): 890 table = self._parse_location_path() 891 else: 892 table = None 893 894 if table: 895 file_format = None 896 pattern = None 897 898 wrapped = self._match(TokenType.L_PAREN) 899 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 900 if self._match_text_seq("FILE_FORMAT", "=>"): 901 file_format = self._parse_string() or super()._parse_table_parts( 902 is_db_reference=is_db_reference 903 ) 904 elif self._match_text_seq("PATTERN", "=>"): 905 pattern = self._parse_string() 906 else: 907 break 908 909 self._match(TokenType.COMMA) 910 911 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 912 else: 913 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 914 915 return table 916 917 def _parse_table( 918 self, 919 schema: bool = False, 920 joins: bool = False, 921 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 922 parse_bracket: bool = False, 923 is_db_reference: bool = False, 924 parse_partition: bool = False, 925 consume_pipe: bool = False, 926 ) -> t.Optional[exp.Expression]: 927 table = super()._parse_table( 928 schema=schema, 929 joins=joins, 930 alias_tokens=alias_tokens, 931 parse_bracket=parse_bracket, 932 is_db_reference=is_db_reference, 933 parse_partition=parse_partition, 934 ) 935 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 936 table_from_rows = table.this 937 for arg in exp.TableFromRows.arg_types: 938 if arg != "this": 939 table_from_rows.set(arg, table.args.get(arg)) 940 941 table = table_from_rows 942 943 return table 944 945 def _parse_id_var( 946 self, 947 any_token: bool = True, 948 tokens: t.Optional[t.Collection[TokenType]] = None, 949 ) -> t.Optional[exp.Expression]: 950 if self._match_text_seq("IDENTIFIER", "("): 951 identifier = ( 952 super()._parse_id_var(any_token=any_token, tokens=tokens) 953 or self._parse_string() 954 ) 955 self._match_r_paren() 956 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 957 958 return super()._parse_id_var(any_token=any_token, tokens=tokens) 959 960 def _parse_show_snowflake(self, this: str) -> exp.Show: 961 scope = None 962 scope_kind = None 963 964 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 965 # which is syntactically valid but has no effect on the output 966 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 967 968 history = self._match_text_seq("HISTORY") 969 970 like = self._parse_string() if self._match(TokenType.LIKE) else None 971 972 if self._match(TokenType.IN): 973 if self._match_text_seq("ACCOUNT"): 974 scope_kind = "ACCOUNT" 975 elif self._match_text_seq("CLASS"): 976 scope_kind = "CLASS" 977 scope = self._parse_table_parts() 978 elif self._match_text_seq("APPLICATION"): 979 scope_kind = "APPLICATION" 980 if self._match_text_seq("PACKAGE"): 981 scope_kind += " PACKAGE" 982 scope = self._parse_table_parts() 983 elif self._match_set(self.DB_CREATABLES): 984 scope_kind = self._prev.text.upper() 985 if self._curr: 986 scope = self._parse_table_parts() 987 elif self._curr: 988 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 989 scope = self._parse_table_parts() 990 991 return self.expression( 992 exp.Show, 993 **{ 994 "terse": terse, 995 "this": this, 996 "history": history, 997 "like": like, 998 "scope": scope, 999 "scope_kind": scope_kind, 1000 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1001 "limit": self._parse_limit(), 1002 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1003 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1004 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1005 }, 1006 ) 1007 1008 def _parse_put(self) -> exp.Put | exp.Command: 1009 if self._curr.token_type != TokenType.STRING: 1010 return self._parse_as_command(self._prev) 1011 1012 return self.expression( 1013 exp.Put, 1014 this=self._parse_string(), 1015 target=self._parse_location_path(), 1016 properties=self._parse_properties(), 1017 ) 1018 1019 def _parse_get(self) -> t.Optional[exp.Expression]: 1020 start = self._prev 1021 1022 # If we detect GET( then we need to parse a function, not a statement 1023 if self._match(TokenType.L_PAREN): 1024 self._retreat(self._index - 2) 1025 return self._parse_expression() 1026 1027 target = self._parse_location_path() 1028 1029 # Parse as command if unquoted file path 1030 if self._curr.token_type == TokenType.URI_START: 1031 return self._parse_as_command(start) 1032 1033 return self.expression( 1034 exp.Get, 1035 this=self._parse_string(), 1036 target=target, 1037 properties=self._parse_properties(), 1038 ) 1039 1040 def _parse_location_property(self) -> exp.LocationProperty: 1041 self._match(TokenType.EQ) 1042 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1043 1044 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1045 # Parse either a subquery or a staged file 1046 return ( 1047 self._parse_select(table=True, parse_subquery_alias=False) 1048 if self._match(TokenType.L_PAREN, advance=False) 1049 else self._parse_table_parts() 1050 ) 1051 1052 def _parse_location_path(self) -> exp.Var: 1053 start = self._curr 1054 self._advance_any(ignore_reserved=True) 1055 1056 # We avoid consuming a comma token because external tables like @foo and @bar 1057 # can be joined in a query with a comma separator, as well as closing paren 1058 # in case of subqueries 1059 while self._is_connected() and not self._match_set( 1060 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1061 ): 1062 self._advance_any(ignore_reserved=True) 1063 1064 return exp.var(self._find_sql(start, self._prev)) 1065 1066 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1067 this = super()._parse_lambda_arg() 1068 1069 if not this: 1070 return this 1071 1072 typ = self._parse_types() 1073 1074 if typ: 1075 return self.expression(exp.Cast, this=this, to=typ) 1076 1077 return this 1078 1079 def _parse_foreign_key(self) -> exp.ForeignKey: 1080 # inlineFK, the REFERENCES columns are implied 1081 if self._match(TokenType.REFERENCES, advance=False): 1082 return self.expression(exp.ForeignKey) 1083 1084 # outoflineFK, explicitly names the columns 1085 return super()._parse_foreign_key() 1086 1087 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1088 self._match(TokenType.EQ) 1089 if self._match(TokenType.L_PAREN, advance=False): 1090 expressions = self._parse_wrapped_options() 1091 else: 1092 expressions = [self._parse_format_name()] 1093 1094 return self.expression( 1095 exp.FileFormatProperty, 1096 expressions=expressions, 1097 ) 1098 1099 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1100 return self.expression( 1101 exp.CredentialsProperty, 1102 expressions=self._parse_wrapped_options(), 1103 ) 1104 1105 def _parse_semantic_view(self) -> exp.SemanticView: 1106 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1107 1108 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1109 if self._match_text_seq("DIMENSIONS"): 1110 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1111 if self._match_text_seq("METRICS"): 1112 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1113 if self._match_text_seq("WHERE"): 1114 kwargs["where"] = self._parse_expression() 1115 1116 return self.expression(exp.SemanticView, **kwargs) 1117 1118 class Tokenizer(tokens.Tokenizer): 1119 STRING_ESCAPES = ["\\", "'"] 1120 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1121 RAW_STRINGS = ["$$"] 1122 COMMENTS = ["--", "//", ("/*", "*/")] 1123 NESTED_COMMENTS = False 1124 1125 KEYWORDS = { 1126 **tokens.Tokenizer.KEYWORDS, 1127 "FILE://": TokenType.URI_START, 1128 "BYTEINT": TokenType.INT, 1129 "EXCLUDE": TokenType.EXCEPT, 1130 "FILE FORMAT": TokenType.FILE_FORMAT, 1131 "GET": TokenType.GET, 1132 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1133 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1134 "MINUS": TokenType.EXCEPT, 1135 "NCHAR VARYING": TokenType.VARCHAR, 1136 "PUT": TokenType.PUT, 1137 "REMOVE": TokenType.COMMAND, 1138 "RM": TokenType.COMMAND, 1139 "SAMPLE": TokenType.TABLE_SAMPLE, 1140 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1141 "SQL_DOUBLE": TokenType.DOUBLE, 1142 "SQL_VARCHAR": TokenType.VARCHAR, 1143 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1144 "TAG": TokenType.TAG, 1145 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1146 "TOP": TokenType.TOP, 1147 "WAREHOUSE": TokenType.WAREHOUSE, 1148 "STAGE": TokenType.STAGE, 1149 "STREAMLIT": TokenType.STREAMLIT, 1150 } 1151 KEYWORDS.pop("/*+") 1152 1153 SINGLE_TOKENS = { 1154 **tokens.Tokenizer.SINGLE_TOKENS, 1155 "$": TokenType.PARAMETER, 1156 } 1157 1158 VAR_SINGLE_TOKENS = {"$"} 1159 1160 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1161 1162 class Generator(generator.Generator): 1163 PARAMETER_TOKEN = "$" 1164 MATCHED_BY_SOURCE = False 1165 SINGLE_STRING_INTERVAL = True 1166 JOIN_HINTS = False 1167 TABLE_HINTS = False 1168 QUERY_HINTS = False 1169 AGGREGATE_FILTER_SUPPORTED = False 1170 SUPPORTS_TABLE_COPY = False 1171 COLLATE_IS_FUNC = True 1172 LIMIT_ONLY_LITERALS = True 1173 JSON_KEY_VALUE_PAIR_SEP = "," 1174 INSERT_OVERWRITE = " OVERWRITE INTO" 1175 STRUCT_DELIMITER = ("(", ")") 1176 COPY_PARAMS_ARE_WRAPPED = False 1177 COPY_PARAMS_EQ_REQUIRED = True 1178 STAR_EXCEPT = "EXCLUDE" 1179 SUPPORTS_EXPLODING_PROJECTIONS = False 1180 ARRAY_CONCAT_IS_VAR_LEN = False 1181 SUPPORTS_CONVERT_TIMEZONE = True 1182 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1183 SUPPORTS_MEDIAN = True 1184 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1185 SUPPORTS_DECODE_CASE = True 1186 IS_BOOL_ALLOWED = False 1187 1188 TRANSFORMS = { 1189 **generator.Generator.TRANSFORMS, 1190 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1191 exp.ArgMax: rename_func("MAX_BY"), 1192 exp.ArgMin: rename_func("MIN_BY"), 1193 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1194 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1195 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1196 exp.AtTimeZone: lambda self, e: self.func( 1197 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1198 ), 1199 exp.BitwiseOr: rename_func("BITOR"), 1200 exp.BitwiseXor: rename_func("BITXOR"), 1201 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1202 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1203 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1204 exp.DateAdd: date_delta_sql("DATEADD"), 1205 exp.DateDiff: date_delta_sql("DATEDIFF"), 1206 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1207 exp.DatetimeDiff: timestampdiff_sql, 1208 exp.DateStrToDate: datestrtodate_sql, 1209 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1210 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1211 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1212 exp.DayOfYear: rename_func("DAYOFYEAR"), 1213 exp.Explode: rename_func("FLATTEN"), 1214 exp.Extract: lambda self, e: self.func( 1215 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1216 ), 1217 exp.FileFormatProperty: lambda self, 1218 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1219 exp.FromTimeZone: lambda self, e: self.func( 1220 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1221 ), 1222 exp.GenerateSeries: lambda self, e: self.func( 1223 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1224 ), 1225 exp.GetExtract: rename_func("GET"), 1226 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1227 exp.If: if_sql(name="IFF", false_value="NULL"), 1228 exp.JSONExtractArray: _json_extract_value_array_sql, 1229 exp.JSONExtractScalar: lambda self, e: self.func( 1230 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1231 ), 1232 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1233 exp.JSONPathRoot: lambda *_: "", 1234 exp.JSONValueArray: _json_extract_value_array_sql, 1235 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1236 rename_func("EDITDISTANCE") 1237 ), 1238 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1239 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1240 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1241 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1242 exp.MakeInterval: no_make_interval_sql, 1243 exp.Max: max_or_greatest, 1244 exp.Min: min_or_least, 1245 exp.ParseJSON: lambda self, e: self.func( 1246 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1247 ), 1248 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1249 exp.PercentileCont: transforms.preprocess( 1250 [transforms.add_within_group_for_percentiles] 1251 ), 1252 exp.PercentileDisc: transforms.preprocess( 1253 [transforms.add_within_group_for_percentiles] 1254 ), 1255 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1256 exp.RegexpExtract: _regexpextract_sql, 1257 exp.RegexpExtractAll: _regexpextract_sql, 1258 exp.RegexpILike: _regexpilike_sql, 1259 exp.Rand: rename_func("RANDOM"), 1260 exp.Select: transforms.preprocess( 1261 [ 1262 transforms.eliminate_window_clause, 1263 transforms.eliminate_distinct_on, 1264 transforms.explode_projection_to_unnest(), 1265 transforms.eliminate_semi_and_anti_joins, 1266 _transform_generate_date_array, 1267 _qualify_unnested_columns, 1268 _eliminate_dot_variant_lookup, 1269 ] 1270 ), 1271 exp.SHA: rename_func("SHA1"), 1272 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1273 exp.StartsWith: rename_func("STARTSWITH"), 1274 exp.EndsWith: rename_func("ENDSWITH"), 1275 exp.StrPosition: lambda self, e: strposition_sql( 1276 self, e, func_name="CHARINDEX", supports_position=True 1277 ), 1278 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1279 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1280 exp.Stuff: rename_func("INSERT"), 1281 exp.StPoint: rename_func("ST_MAKEPOINT"), 1282 exp.TimeAdd: date_delta_sql("TIMEADD"), 1283 exp.Timestamp: no_timestamp_sql, 1284 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1285 exp.TimestampDiff: lambda self, e: self.func( 1286 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1287 ), 1288 exp.TimestampTrunc: timestamptrunc_sql(), 1289 exp.TimeStrToTime: timestrtotime_sql, 1290 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1291 exp.ToArray: rename_func("TO_ARRAY"), 1292 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1293 exp.ToDouble: rename_func("TO_DOUBLE"), 1294 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1295 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1296 exp.TsOrDsToDate: lambda self, e: self.func( 1297 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1298 ), 1299 exp.TsOrDsToTime: lambda self, e: self.func( 1300 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1301 ), 1302 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1303 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1304 exp.Uuid: rename_func("UUID_STRING"), 1305 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1306 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1307 exp.Xor: rename_func("BOOLXOR"), 1308 } 1309 1310 SUPPORTED_JSON_PATH_PARTS = { 1311 exp.JSONPathKey, 1312 exp.JSONPathRoot, 1313 exp.JSONPathSubscript, 1314 } 1315 1316 TYPE_MAPPING = { 1317 **generator.Generator.TYPE_MAPPING, 1318 exp.DataType.Type.NESTED: "OBJECT", 1319 exp.DataType.Type.STRUCT: "OBJECT", 1320 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1321 } 1322 1323 TOKEN_MAPPING = { 1324 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1325 } 1326 1327 PROPERTIES_LOCATION = { 1328 **generator.Generator.PROPERTIES_LOCATION, 1329 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1330 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1331 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1332 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1333 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1334 } 1335 1336 UNSUPPORTED_VALUES_EXPRESSIONS = { 1337 exp.Map, 1338 exp.StarMap, 1339 exp.Struct, 1340 exp.VarMap, 1341 } 1342 1343 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1344 1345 def with_properties(self, properties: exp.Properties) -> str: 1346 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1347 1348 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1349 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1350 values_as_table = False 1351 1352 return super().values_sql(expression, values_as_table=values_as_table) 1353 1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression) 1365 1366 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1367 return self.func( 1368 "TO_NUMBER", 1369 expression.this, 1370 expression.args.get("format"), 1371 expression.args.get("precision"), 1372 expression.args.get("scale"), 1373 ) 1374 1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1382 1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix) 1390 1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 # Snowflake requires that TRY_CAST's value be a string 1400 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1401 # if we can deduce that the value is a string, then we can generate TRY_CAST 1402 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1403 return super().trycast_sql(expression) 1404 1405 return self.cast_sql(expression) 1406 1407 def log_sql(self, expression: exp.Log) -> str: 1408 if not expression.expression: 1409 return self.func("LN", expression.this) 1410 1411 return super().log_sql(expression) 1412 1413 def unnest_sql(self, expression: exp.Unnest) -> str: 1414 unnest_alias = expression.args.get("alias") 1415 offset = expression.args.get("offset") 1416 1417 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1418 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1419 1420 columns = [ 1421 exp.to_identifier("seq"), 1422 exp.to_identifier("key"), 1423 exp.to_identifier("path"), 1424 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1425 value, 1426 exp.to_identifier("this"), 1427 ] 1428 1429 if unnest_alias: 1430 unnest_alias.set("columns", columns) 1431 else: 1432 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1433 1434 table_input = self.sql(expression.expressions[0]) 1435 if not table_input.startswith("INPUT =>"): 1436 table_input = f"INPUT => {table_input}" 1437 1438 expression_parent = expression.parent 1439 1440 explode = ( 1441 f"FLATTEN({table_input})" 1442 if isinstance(expression_parent, exp.Lateral) 1443 else f"TABLE(FLATTEN({table_input}))" 1444 ) 1445 alias = self.sql(unnest_alias) 1446 alias = f" AS {alias}" if alias else "" 1447 value = ( 1448 "" 1449 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1450 else f"{value} FROM " 1451 ) 1452 1453 return f"{value}{explode}{alias}" 1454 1455 def show_sql(self, expression: exp.Show) -> str: 1456 terse = "TERSE " if expression.args.get("terse") else "" 1457 history = " HISTORY" if expression.args.get("history") else "" 1458 like = self.sql(expression, "like") 1459 like = f" LIKE {like}" if like else "" 1460 1461 scope = self.sql(expression, "scope") 1462 scope = f" {scope}" if scope else "" 1463 1464 scope_kind = self.sql(expression, "scope_kind") 1465 if scope_kind: 1466 scope_kind = f" IN {scope_kind}" 1467 1468 starts_with = self.sql(expression, "starts_with") 1469 if starts_with: 1470 starts_with = f" STARTS WITH {starts_with}" 1471 1472 limit = self.sql(expression, "limit") 1473 1474 from_ = self.sql(expression, "from") 1475 if from_: 1476 from_ = f" FROM {from_}" 1477 1478 privileges = self.expressions(expression, key="privileges", flat=True) 1479 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1480 1481 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1482 1483 def describe_sql(self, expression: exp.Describe) -> str: 1484 # Default to table if kind is unknown 1485 kind_value = expression.args.get("kind") or "TABLE" 1486 kind = f" {kind_value}" if kind_value else "" 1487 this = f" {self.sql(expression, 'this')}" 1488 expressions = self.expressions(expression, flat=True) 1489 expressions = f" {expressions}" if expressions else "" 1490 return f"DESCRIBE{kind}{this}{expressions}" 1491 1492 def generatedasidentitycolumnconstraint_sql( 1493 self, expression: exp.GeneratedAsIdentityColumnConstraint 1494 ) -> str: 1495 start = expression.args.get("start") 1496 start = f" START {start}" if start else "" 1497 increment = expression.args.get("increment") 1498 increment = f" INCREMENT {increment}" if increment else "" 1499 1500 order = expression.args.get("order") 1501 if order is not None: 1502 order_clause = " ORDER" if order else " NOORDER" 1503 else: 1504 order_clause = "" 1505 1506 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1507 1508 def cluster_sql(self, expression: exp.Cluster) -> str: 1509 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1510 1511 def struct_sql(self, expression: exp.Struct) -> str: 1512 keys = [] 1513 values = [] 1514 1515 for i, e in enumerate(expression.expressions): 1516 if isinstance(e, exp.PropertyEQ): 1517 keys.append( 1518 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1519 ) 1520 values.append(e.expression) 1521 else: 1522 keys.append(exp.Literal.string(f"_{i}")) 1523 values.append(e) 1524 1525 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1526 1527 @unsupported_args("weight", "accuracy") 1528 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1529 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1530 1531 def alterset_sql(self, expression: exp.AlterSet) -> str: 1532 exprs = self.expressions(expression, flat=True) 1533 exprs = f" {exprs}" if exprs else "" 1534 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1535 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1536 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1537 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1538 tag = self.expressions(expression, key="tag", flat=True) 1539 tag = f" TAG {tag}" if tag else "" 1540 1541 return f"SET{exprs}{file_format}{copy_options}{tag}" 1542 1543 def strtotime_sql(self, expression: exp.StrToTime): 1544 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1545 return self.func( 1546 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1547 ) 1548 1549 def timestampsub_sql(self, expression: exp.TimestampSub): 1550 return self.sql( 1551 exp.TimestampAdd( 1552 this=expression.this, 1553 expression=expression.expression * -1, 1554 unit=expression.unit, 1555 ) 1556 ) 1557 1558 def jsonextract_sql(self, expression: exp.JSONExtract): 1559 this = expression.this 1560 1561 # JSON strings are valid coming from other dialects such as BQ so 1562 # for these cases we PARSE_JSON preemptively 1563 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1564 "requires_json" 1565 ): 1566 this = exp.ParseJSON(this=this) 1567 1568 return self.func( 1569 "GET_PATH", 1570 this, 1571 expression.expression, 1572 ) 1573 1574 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1575 this = expression.this 1576 if this.is_string: 1577 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1578 1579 return self.func("TO_CHAR", this, self.format_time(expression)) 1580 1581 def datesub_sql(self, expression: exp.DateSub) -> str: 1582 value = expression.expression 1583 if value: 1584 value.replace(value * (-1)) 1585 else: 1586 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1587 1588 return date_delta_sql("DATEADD")(self, expression) 1589 1590 def select_sql(self, expression: exp.Select) -> str: 1591 limit = expression.args.get("limit") 1592 offset = expression.args.get("offset") 1593 if offset and not limit: 1594 expression.limit(exp.Null(), copy=False) 1595 return super().select_sql(expression) 1596 1597 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1598 is_materialized = expression.find(exp.MaterializedProperty) 1599 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1600 1601 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1602 # For materialized views, COPY GRANTS is located *before* the columns list 1603 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1604 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1605 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1607 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1608 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1609 1610 this_name = self.sql(expression.this, "this") 1611 copy_grants = self.sql(copy_grants_property) 1612 this_schema = self.schema_columns_sql(expression.this) 1613 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1614 1615 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1616 1617 return super().createable_sql(expression, locations) 1618 1619 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1620 this = expression.this 1621 1622 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1623 # and add it later as part of the WITHIN GROUP clause 1624 order = this if isinstance(this, exp.Order) else None 1625 if order: 1626 expression.set("this", order.this.pop()) 1627 1628 expr_sql = super().arrayagg_sql(expression) 1629 1630 if order: 1631 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1632 1633 return expr_sql 1634 1635 def array_sql(self, expression: exp.Array) -> str: 1636 expressions = expression.expressions 1637 1638 first_expr = seq_get(expressions, 0) 1639 if isinstance(first_expr, exp.Select): 1640 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1641 if first_expr.text("kind").upper() == "STRUCT": 1642 object_construct_args = [] 1643 for expr in first_expr.expressions: 1644 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1645 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1646 name = expr.this if isinstance(expr, exp.Alias) else expr 1647 1648 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1649 1650 array_agg = exp.ArrayAgg( 1651 this=_build_object_construct(args=object_construct_args) 1652 ) 1653 1654 first_expr.set("kind", None) 1655 first_expr.set("expressions", [array_agg]) 1656 1657 return self.sql(first_expr.subquery()) 1658 1659 return inline_array_sql(self, expression) 1660 1661 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1662 zone = self.sql(expression, "this") 1663 if not zone: 1664 return super().currentdate_sql(expression) 1665 1666 expr = exp.Cast( 1667 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1668 to=exp.DataType(this=exp.DataType.Type.DATE), 1669 ) 1670 return self.sql(expr) 1671 1672 def dot_sql(self, expression: exp.Dot) -> str: 1673 this = expression.this 1674 1675 if not this.type: 1676 from sqlglot.optimizer.annotate_types import annotate_types 1677 1678 this = annotate_types(this, dialect=self.dialect) 1679 1680 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1681 # Generate colon notation for the top level STRUCT 1682 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1683 1684 return super().dot_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET": exp.GetExtract.from_arg_list, 596 "GET_PATH": lambda args, dialect: exp.JSONExtract( 597 this=seq_get(args, 0), 598 expression=dialect.to_json_path(seq_get(args, 1)), 599 requires_json=True, 600 ), 601 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 602 "IFF": exp.If.from_arg_list, 603 "LAST_DAY": lambda args: exp.LastDay( 604 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 605 ), 606 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 608 "NULLIFZERO": _build_if_from_nullifzero, 609 "OBJECT_CONSTRUCT": _build_object_construct, 610 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 611 "REGEXP_REPLACE": _build_regexp_replace, 612 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 613 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 614 "REPLACE": build_replace_with_optional_replacement, 615 "RLIKE": exp.RegexpLike.from_arg_list, 616 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 617 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 618 "TIMEADD": _build_date_time_add(exp.TimeAdd), 619 "TIMEDIFF": _build_datediff, 620 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 621 "TIMESTAMPDIFF": _build_datediff, 622 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 623 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 624 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 625 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 626 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 627 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 628 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 629 "TRY_TO_TIMESTAMP": _build_datetime( 630 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 631 ), 632 "TO_CHAR": build_timetostr_or_tochar, 633 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 634 "TO_NUMBER": lambda args: exp.ToNumber( 635 this=seq_get(args, 0), 636 format=seq_get(args, 1), 637 precision=seq_get(args, 2), 638 scale=seq_get(args, 3), 639 ), 640 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 641 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 642 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 643 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 644 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 645 "TO_VARCHAR": exp.ToChar.from_arg_list, 646 "ZEROIFNULL": _build_if_from_zeroifnull, 647 } 648 649 FUNCTION_PARSERS = { 650 **parser.Parser.FUNCTION_PARSERS, 651 "DATE_PART": lambda self: self._parse_date_part(), 652 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 653 "LISTAGG": lambda self: self._parse_string_agg(), 654 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 655 } 656 FUNCTION_PARSERS.pop("TRIM") 657 658 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 659 660 ALTER_PARSERS = { 661 **parser.Parser.ALTER_PARSERS, 662 "UNSET": lambda self: self.expression( 663 exp.Set, 664 tag=self._match_text_seq("TAG"), 665 expressions=self._parse_csv(self._parse_id_var), 666 unset=True, 667 ), 668 } 669 670 STATEMENT_PARSERS = { 671 **parser.Parser.STATEMENT_PARSERS, 672 TokenType.GET: lambda self: self._parse_get(), 673 TokenType.PUT: lambda self: self._parse_put(), 674 TokenType.SHOW: lambda self: self._parse_show(), 675 } 676 677 PROPERTY_PARSERS = { 678 **parser.Parser.PROPERTY_PARSERS, 679 "CREDENTIALS": lambda self: self._parse_credentials_property(), 680 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 681 "LOCATION": lambda self: self._parse_location_property(), 682 "TAG": lambda self: self._parse_tag(), 683 "USING": lambda self: self._match_text_seq("TEMPLATE") 684 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 685 } 686 687 TYPE_CONVERTERS = { 688 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 689 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 690 } 691 692 SHOW_PARSERS = { 693 "DATABASES": _show_parser("DATABASES"), 694 "TERSE DATABASES": _show_parser("DATABASES"), 695 "SCHEMAS": _show_parser("SCHEMAS"), 696 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 697 "OBJECTS": _show_parser("OBJECTS"), 698 "TERSE OBJECTS": _show_parser("OBJECTS"), 699 "TABLES": _show_parser("TABLES"), 700 "TERSE TABLES": _show_parser("TABLES"), 701 "VIEWS": _show_parser("VIEWS"), 702 "TERSE VIEWS": _show_parser("VIEWS"), 703 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 705 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 707 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 709 "SEQUENCES": _show_parser("SEQUENCES"), 710 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 711 "STAGES": _show_parser("STAGES"), 712 "COLUMNS": _show_parser("COLUMNS"), 713 "USERS": _show_parser("USERS"), 714 "TERSE USERS": _show_parser("USERS"), 715 "FILE FORMATS": _show_parser("FILE FORMATS"), 716 "FUNCTIONS": _show_parser("FUNCTIONS"), 717 "PROCEDURES": _show_parser("PROCEDURES"), 718 "WAREHOUSES": _show_parser("WAREHOUSES"), 719 } 720 721 CONSTRAINT_PARSERS = { 722 **parser.Parser.CONSTRAINT_PARSERS, 723 "WITH": lambda self: self._parse_with_constraint(), 724 "MASKING": lambda self: self._parse_with_constraint(), 725 "PROJECTION": lambda self: self._parse_with_constraint(), 726 "TAG": lambda self: self._parse_with_constraint(), 727 } 728 729 STAGED_FILE_SINGLE_TOKENS = { 730 TokenType.DOT, 731 TokenType.MOD, 732 TokenType.SLASH, 733 } 734 735 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 736 737 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 738 739 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 740 741 LAMBDAS = { 742 **parser.Parser.LAMBDAS, 743 TokenType.ARROW: lambda self, expressions: self.expression( 744 exp.Lambda, 745 this=self._replace_lambda( 746 self._parse_assignment(), 747 expressions, 748 ), 749 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 750 ), 751 } 752 753 def _parse_use(self) -> exp.Use: 754 if self._match_text_seq("SECONDARY", "ROLES"): 755 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 756 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 757 return self.expression( 758 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 759 ) 760 761 return super()._parse_use() 762 763 def _negate_range( 764 self, this: t.Optional[exp.Expression] = None 765 ) -> t.Optional[exp.Expression]: 766 if not this: 767 return this 768 769 query = this.args.get("query") 770 if isinstance(this, exp.In) and isinstance(query, exp.Query): 771 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 772 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 773 # which can produce different results (most likely a SnowFlake bug). 774 # 775 # https://docs.snowflake.com/en/sql-reference/functions/in 776 # Context: https://github.com/tobymao/sqlglot/issues/3890 777 return self.expression( 778 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 779 ) 780 781 return self.expression(exp.Not, this=this) 782 783 def _parse_tag(self) -> exp.Tags: 784 return self.expression( 785 exp.Tags, 786 expressions=self._parse_wrapped_csv(self._parse_property), 787 ) 788 789 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 790 if self._prev.token_type != TokenType.WITH: 791 self._retreat(self._index - 1) 792 793 if self._match_text_seq("MASKING", "POLICY"): 794 policy = self._parse_column() 795 return self.expression( 796 exp.MaskingPolicyColumnConstraint, 797 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 798 expressions=self._match(TokenType.USING) 799 and self._parse_wrapped_csv(self._parse_id_var), 800 ) 801 if self._match_text_seq("PROJECTION", "POLICY"): 802 policy = self._parse_column() 803 return self.expression( 804 exp.ProjectionPolicyColumnConstraint, 805 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 806 ) 807 if self._match(TokenType.TAG): 808 return self._parse_tag() 809 810 return None 811 812 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 813 if self._match(TokenType.TAG): 814 return self._parse_tag() 815 816 return super()._parse_with_property() 817 818 def _parse_create(self) -> exp.Create | exp.Command: 819 expression = super()._parse_create() 820 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 821 # Replace the Table node with the enclosed Identifier 822 expression.this.replace(expression.this.this) 823 824 return expression 825 826 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 827 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 828 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 829 this = self._parse_var() or self._parse_type() 830 831 if not this: 832 return None 833 834 self._match(TokenType.COMMA) 835 expression = self._parse_bitwise() 836 this = map_date_part(this) 837 name = this.name.upper() 838 839 if name.startswith("EPOCH"): 840 if name == "EPOCH_MILLISECOND": 841 scale = 10**3 842 elif name == "EPOCH_MICROSECOND": 843 scale = 10**6 844 elif name == "EPOCH_NANOSECOND": 845 scale = 10**9 846 else: 847 scale = None 848 849 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 850 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 851 852 if scale: 853 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 854 855 return to_unix 856 857 return self.expression(exp.Extract, this=this, expression=expression) 858 859 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 860 if is_map: 861 # Keys are strings in Snowflake's objects, see also: 862 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 863 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 864 return self._parse_slice(self._parse_string()) 865 866 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 867 868 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 869 lateral = super()._parse_lateral() 870 if not lateral: 871 return lateral 872 873 if isinstance(lateral.this, exp.Explode): 874 table_alias = lateral.args.get("alias") 875 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 876 if table_alias and not table_alias.args.get("columns"): 877 table_alias.set("columns", columns) 878 elif not table_alias: 879 exp.alias_(lateral, "_flattened", table=columns, copy=False) 880 881 return lateral 882 883 def _parse_table_parts( 884 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 885 ) -> exp.Table: 886 # https://docs.snowflake.com/en/user-guide/querying-stage 887 if self._match(TokenType.STRING, advance=False): 888 table = self._parse_string() 889 elif self._match_text_seq("@", advance=False): 890 table = self._parse_location_path() 891 else: 892 table = None 893 894 if table: 895 file_format = None 896 pattern = None 897 898 wrapped = self._match(TokenType.L_PAREN) 899 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 900 if self._match_text_seq("FILE_FORMAT", "=>"): 901 file_format = self._parse_string() or super()._parse_table_parts( 902 is_db_reference=is_db_reference 903 ) 904 elif self._match_text_seq("PATTERN", "=>"): 905 pattern = self._parse_string() 906 else: 907 break 908 909 self._match(TokenType.COMMA) 910 911 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 912 else: 913 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 914 915 return table 916 917 def _parse_table( 918 self, 919 schema: bool = False, 920 joins: bool = False, 921 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 922 parse_bracket: bool = False, 923 is_db_reference: bool = False, 924 parse_partition: bool = False, 925 consume_pipe: bool = False, 926 ) -> t.Optional[exp.Expression]: 927 table = super()._parse_table( 928 schema=schema, 929 joins=joins, 930 alias_tokens=alias_tokens, 931 parse_bracket=parse_bracket, 932 is_db_reference=is_db_reference, 933 parse_partition=parse_partition, 934 ) 935 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 936 table_from_rows = table.this 937 for arg in exp.TableFromRows.arg_types: 938 if arg != "this": 939 table_from_rows.set(arg, table.args.get(arg)) 940 941 table = table_from_rows 942 943 return table 944 945 def _parse_id_var( 946 self, 947 any_token: bool = True, 948 tokens: t.Optional[t.Collection[TokenType]] = None, 949 ) -> t.Optional[exp.Expression]: 950 if self._match_text_seq("IDENTIFIER", "("): 951 identifier = ( 952 super()._parse_id_var(any_token=any_token, tokens=tokens) 953 or self._parse_string() 954 ) 955 self._match_r_paren() 956 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 957 958 return super()._parse_id_var(any_token=any_token, tokens=tokens) 959 960 def _parse_show_snowflake(self, this: str) -> exp.Show: 961 scope = None 962 scope_kind = None 963 964 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 965 # which is syntactically valid but has no effect on the output 966 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 967 968 history = self._match_text_seq("HISTORY") 969 970 like = self._parse_string() if self._match(TokenType.LIKE) else None 971 972 if self._match(TokenType.IN): 973 if self._match_text_seq("ACCOUNT"): 974 scope_kind = "ACCOUNT" 975 elif self._match_text_seq("CLASS"): 976 scope_kind = "CLASS" 977 scope = self._parse_table_parts() 978 elif self._match_text_seq("APPLICATION"): 979 scope_kind = "APPLICATION" 980 if self._match_text_seq("PACKAGE"): 981 scope_kind += " PACKAGE" 982 scope = self._parse_table_parts() 983 elif self._match_set(self.DB_CREATABLES): 984 scope_kind = self._prev.text.upper() 985 if self._curr: 986 scope = self._parse_table_parts() 987 elif self._curr: 988 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 989 scope = self._parse_table_parts() 990 991 return self.expression( 992 exp.Show, 993 **{ 994 "terse": terse, 995 "this": this, 996 "history": history, 997 "like": like, 998 "scope": scope, 999 "scope_kind": scope_kind, 1000 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1001 "limit": self._parse_limit(), 1002 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1003 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1004 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1005 }, 1006 ) 1007 1008 def _parse_put(self) -> exp.Put | exp.Command: 1009 if self._curr.token_type != TokenType.STRING: 1010 return self._parse_as_command(self._prev) 1011 1012 return self.expression( 1013 exp.Put, 1014 this=self._parse_string(), 1015 target=self._parse_location_path(), 1016 properties=self._parse_properties(), 1017 ) 1018 1019 def _parse_get(self) -> t.Optional[exp.Expression]: 1020 start = self._prev 1021 1022 # If we detect GET( then we need to parse a function, not a statement 1023 if self._match(TokenType.L_PAREN): 1024 self._retreat(self._index - 2) 1025 return self._parse_expression() 1026 1027 target = self._parse_location_path() 1028 1029 # Parse as command if unquoted file path 1030 if self._curr.token_type == TokenType.URI_START: 1031 return self._parse_as_command(start) 1032 1033 return self.expression( 1034 exp.Get, 1035 this=self._parse_string(), 1036 target=target, 1037 properties=self._parse_properties(), 1038 ) 1039 1040 def _parse_location_property(self) -> exp.LocationProperty: 1041 self._match(TokenType.EQ) 1042 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1043 1044 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1045 # Parse either a subquery or a staged file 1046 return ( 1047 self._parse_select(table=True, parse_subquery_alias=False) 1048 if self._match(TokenType.L_PAREN, advance=False) 1049 else self._parse_table_parts() 1050 ) 1051 1052 def _parse_location_path(self) -> exp.Var: 1053 start = self._curr 1054 self._advance_any(ignore_reserved=True) 1055 1056 # We avoid consuming a comma token because external tables like @foo and @bar 1057 # can be joined in a query with a comma separator, as well as closing paren 1058 # in case of subqueries 1059 while self._is_connected() and not self._match_set( 1060 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1061 ): 1062 self._advance_any(ignore_reserved=True) 1063 1064 return exp.var(self._find_sql(start, self._prev)) 1065 1066 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1067 this = super()._parse_lambda_arg() 1068 1069 if not this: 1070 return this 1071 1072 typ = self._parse_types() 1073 1074 if typ: 1075 return self.expression(exp.Cast, this=this, to=typ) 1076 1077 return this 1078 1079 def _parse_foreign_key(self) -> exp.ForeignKey: 1080 # inlineFK, the REFERENCES columns are implied 1081 if self._match(TokenType.REFERENCES, advance=False): 1082 return self.expression(exp.ForeignKey) 1083 1084 # outoflineFK, explicitly names the columns 1085 return super()._parse_foreign_key() 1086 1087 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1088 self._match(TokenType.EQ) 1089 if self._match(TokenType.L_PAREN, advance=False): 1090 expressions = self._parse_wrapped_options() 1091 else: 1092 expressions = [self._parse_format_name()] 1093 1094 return self.expression( 1095 exp.FileFormatProperty, 1096 expressions=expressions, 1097 ) 1098 1099 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1100 return self.expression( 1101 exp.CredentialsProperty, 1102 expressions=self._parse_wrapped_options(), 1103 ) 1104 1105 def _parse_semantic_view(self) -> exp.SemanticView: 1106 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1107 1108 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1109 if self._match_text_seq("DIMENSIONS"): 1110 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1111 if self._match_text_seq("METRICS"): 1112 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1113 if self._match_text_seq("WHERE"): 1114 kwargs["where"] = self._parse_expression() 1115 1116 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1118 class Tokenizer(tokens.Tokenizer): 1119 STRING_ESCAPES = ["\\", "'"] 1120 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1121 RAW_STRINGS = ["$$"] 1122 COMMENTS = ["--", "//", ("/*", "*/")] 1123 NESTED_COMMENTS = False 1124 1125 KEYWORDS = { 1126 **tokens.Tokenizer.KEYWORDS, 1127 "FILE://": TokenType.URI_START, 1128 "BYTEINT": TokenType.INT, 1129 "EXCLUDE": TokenType.EXCEPT, 1130 "FILE FORMAT": TokenType.FILE_FORMAT, 1131 "GET": TokenType.GET, 1132 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1133 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1134 "MINUS": TokenType.EXCEPT, 1135 "NCHAR VARYING": TokenType.VARCHAR, 1136 "PUT": TokenType.PUT, 1137 "REMOVE": TokenType.COMMAND, 1138 "RM": TokenType.COMMAND, 1139 "SAMPLE": TokenType.TABLE_SAMPLE, 1140 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1141 "SQL_DOUBLE": TokenType.DOUBLE, 1142 "SQL_VARCHAR": TokenType.VARCHAR, 1143 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1144 "TAG": TokenType.TAG, 1145 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1146 "TOP": TokenType.TOP, 1147 "WAREHOUSE": TokenType.WAREHOUSE, 1148 "STAGE": TokenType.STAGE, 1149 "STREAMLIT": TokenType.STREAMLIT, 1150 } 1151 KEYWORDS.pop("/*+") 1152 1153 SINGLE_TOKENS = { 1154 **tokens.Tokenizer.SINGLE_TOKENS, 1155 "$": TokenType.PARAMETER, 1156 } 1157 1158 VAR_SINGLE_TOKENS = {"$"} 1159 1160 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1162 class Generator(generator.Generator): 1163 PARAMETER_TOKEN = "$" 1164 MATCHED_BY_SOURCE = False 1165 SINGLE_STRING_INTERVAL = True 1166 JOIN_HINTS = False 1167 TABLE_HINTS = False 1168 QUERY_HINTS = False 1169 AGGREGATE_FILTER_SUPPORTED = False 1170 SUPPORTS_TABLE_COPY = False 1171 COLLATE_IS_FUNC = True 1172 LIMIT_ONLY_LITERALS = True 1173 JSON_KEY_VALUE_PAIR_SEP = "," 1174 INSERT_OVERWRITE = " OVERWRITE INTO" 1175 STRUCT_DELIMITER = ("(", ")") 1176 COPY_PARAMS_ARE_WRAPPED = False 1177 COPY_PARAMS_EQ_REQUIRED = True 1178 STAR_EXCEPT = "EXCLUDE" 1179 SUPPORTS_EXPLODING_PROJECTIONS = False 1180 ARRAY_CONCAT_IS_VAR_LEN = False 1181 SUPPORTS_CONVERT_TIMEZONE = True 1182 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1183 SUPPORTS_MEDIAN = True 1184 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1185 SUPPORTS_DECODE_CASE = True 1186 IS_BOOL_ALLOWED = False 1187 1188 TRANSFORMS = { 1189 **generator.Generator.TRANSFORMS, 1190 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1191 exp.ArgMax: rename_func("MAX_BY"), 1192 exp.ArgMin: rename_func("MIN_BY"), 1193 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1194 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1195 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1196 exp.AtTimeZone: lambda self, e: self.func( 1197 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1198 ), 1199 exp.BitwiseOr: rename_func("BITOR"), 1200 exp.BitwiseXor: rename_func("BITXOR"), 1201 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1202 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1203 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1204 exp.DateAdd: date_delta_sql("DATEADD"), 1205 exp.DateDiff: date_delta_sql("DATEDIFF"), 1206 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1207 exp.DatetimeDiff: timestampdiff_sql, 1208 exp.DateStrToDate: datestrtodate_sql, 1209 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1210 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1211 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1212 exp.DayOfYear: rename_func("DAYOFYEAR"), 1213 exp.Explode: rename_func("FLATTEN"), 1214 exp.Extract: lambda self, e: self.func( 1215 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1216 ), 1217 exp.FileFormatProperty: lambda self, 1218 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1219 exp.FromTimeZone: lambda self, e: self.func( 1220 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1221 ), 1222 exp.GenerateSeries: lambda self, e: self.func( 1223 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1224 ), 1225 exp.GetExtract: rename_func("GET"), 1226 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1227 exp.If: if_sql(name="IFF", false_value="NULL"), 1228 exp.JSONExtractArray: _json_extract_value_array_sql, 1229 exp.JSONExtractScalar: lambda self, e: self.func( 1230 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1231 ), 1232 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1233 exp.JSONPathRoot: lambda *_: "", 1234 exp.JSONValueArray: _json_extract_value_array_sql, 1235 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1236 rename_func("EDITDISTANCE") 1237 ), 1238 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1239 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1240 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1241 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1242 exp.MakeInterval: no_make_interval_sql, 1243 exp.Max: max_or_greatest, 1244 exp.Min: min_or_least, 1245 exp.ParseJSON: lambda self, e: self.func( 1246 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1247 ), 1248 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1249 exp.PercentileCont: transforms.preprocess( 1250 [transforms.add_within_group_for_percentiles] 1251 ), 1252 exp.PercentileDisc: transforms.preprocess( 1253 [transforms.add_within_group_for_percentiles] 1254 ), 1255 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1256 exp.RegexpExtract: _regexpextract_sql, 1257 exp.RegexpExtractAll: _regexpextract_sql, 1258 exp.RegexpILike: _regexpilike_sql, 1259 exp.Rand: rename_func("RANDOM"), 1260 exp.Select: transforms.preprocess( 1261 [ 1262 transforms.eliminate_window_clause, 1263 transforms.eliminate_distinct_on, 1264 transforms.explode_projection_to_unnest(), 1265 transforms.eliminate_semi_and_anti_joins, 1266 _transform_generate_date_array, 1267 _qualify_unnested_columns, 1268 _eliminate_dot_variant_lookup, 1269 ] 1270 ), 1271 exp.SHA: rename_func("SHA1"), 1272 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1273 exp.StartsWith: rename_func("STARTSWITH"), 1274 exp.EndsWith: rename_func("ENDSWITH"), 1275 exp.StrPosition: lambda self, e: strposition_sql( 1276 self, e, func_name="CHARINDEX", supports_position=True 1277 ), 1278 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1279 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1280 exp.Stuff: rename_func("INSERT"), 1281 exp.StPoint: rename_func("ST_MAKEPOINT"), 1282 exp.TimeAdd: date_delta_sql("TIMEADD"), 1283 exp.Timestamp: no_timestamp_sql, 1284 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1285 exp.TimestampDiff: lambda self, e: self.func( 1286 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1287 ), 1288 exp.TimestampTrunc: timestamptrunc_sql(), 1289 exp.TimeStrToTime: timestrtotime_sql, 1290 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1291 exp.ToArray: rename_func("TO_ARRAY"), 1292 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1293 exp.ToDouble: rename_func("TO_DOUBLE"), 1294 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1295 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1296 exp.TsOrDsToDate: lambda self, e: self.func( 1297 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1298 ), 1299 exp.TsOrDsToTime: lambda self, e: self.func( 1300 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1301 ), 1302 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1303 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1304 exp.Uuid: rename_func("UUID_STRING"), 1305 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1306 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1307 exp.Xor: rename_func("BOOLXOR"), 1308 } 1309 1310 SUPPORTED_JSON_PATH_PARTS = { 1311 exp.JSONPathKey, 1312 exp.JSONPathRoot, 1313 exp.JSONPathSubscript, 1314 } 1315 1316 TYPE_MAPPING = { 1317 **generator.Generator.TYPE_MAPPING, 1318 exp.DataType.Type.NESTED: "OBJECT", 1319 exp.DataType.Type.STRUCT: "OBJECT", 1320 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1321 } 1322 1323 TOKEN_MAPPING = { 1324 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1325 } 1326 1327 PROPERTIES_LOCATION = { 1328 **generator.Generator.PROPERTIES_LOCATION, 1329 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1330 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1331 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1332 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1333 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1334 } 1335 1336 UNSUPPORTED_VALUES_EXPRESSIONS = { 1337 exp.Map, 1338 exp.StarMap, 1339 exp.Struct, 1340 exp.VarMap, 1341 } 1342 1343 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1344 1345 def with_properties(self, properties: exp.Properties) -> str: 1346 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1347 1348 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1349 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1350 values_as_table = False 1351 1352 return super().values_sql(expression, values_as_table=values_as_table) 1353 1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression) 1365 1366 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1367 return self.func( 1368 "TO_NUMBER", 1369 expression.this, 1370 expression.args.get("format"), 1371 expression.args.get("precision"), 1372 expression.args.get("scale"), 1373 ) 1374 1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1382 1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix) 1390 1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 # Snowflake requires that TRY_CAST's value be a string 1400 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1401 # if we can deduce that the value is a string, then we can generate TRY_CAST 1402 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1403 return super().trycast_sql(expression) 1404 1405 return self.cast_sql(expression) 1406 1407 def log_sql(self, expression: exp.Log) -> str: 1408 if not expression.expression: 1409 return self.func("LN", expression.this) 1410 1411 return super().log_sql(expression) 1412 1413 def unnest_sql(self, expression: exp.Unnest) -> str: 1414 unnest_alias = expression.args.get("alias") 1415 offset = expression.args.get("offset") 1416 1417 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1418 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1419 1420 columns = [ 1421 exp.to_identifier("seq"), 1422 exp.to_identifier("key"), 1423 exp.to_identifier("path"), 1424 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1425 value, 1426 exp.to_identifier("this"), 1427 ] 1428 1429 if unnest_alias: 1430 unnest_alias.set("columns", columns) 1431 else: 1432 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1433 1434 table_input = self.sql(expression.expressions[0]) 1435 if not table_input.startswith("INPUT =>"): 1436 table_input = f"INPUT => {table_input}" 1437 1438 expression_parent = expression.parent 1439 1440 explode = ( 1441 f"FLATTEN({table_input})" 1442 if isinstance(expression_parent, exp.Lateral) 1443 else f"TABLE(FLATTEN({table_input}))" 1444 ) 1445 alias = self.sql(unnest_alias) 1446 alias = f" AS {alias}" if alias else "" 1447 value = ( 1448 "" 1449 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1450 else f"{value} FROM " 1451 ) 1452 1453 return f"{value}{explode}{alias}" 1454 1455 def show_sql(self, expression: exp.Show) -> str: 1456 terse = "TERSE " if expression.args.get("terse") else "" 1457 history = " HISTORY" if expression.args.get("history") else "" 1458 like = self.sql(expression, "like") 1459 like = f" LIKE {like}" if like else "" 1460 1461 scope = self.sql(expression, "scope") 1462 scope = f" {scope}" if scope else "" 1463 1464 scope_kind = self.sql(expression, "scope_kind") 1465 if scope_kind: 1466 scope_kind = f" IN {scope_kind}" 1467 1468 starts_with = self.sql(expression, "starts_with") 1469 if starts_with: 1470 starts_with = f" STARTS WITH {starts_with}" 1471 1472 limit = self.sql(expression, "limit") 1473 1474 from_ = self.sql(expression, "from") 1475 if from_: 1476 from_ = f" FROM {from_}" 1477 1478 privileges = self.expressions(expression, key="privileges", flat=True) 1479 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1480 1481 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1482 1483 def describe_sql(self, expression: exp.Describe) -> str: 1484 # Default to table if kind is unknown 1485 kind_value = expression.args.get("kind") or "TABLE" 1486 kind = f" {kind_value}" if kind_value else "" 1487 this = f" {self.sql(expression, 'this')}" 1488 expressions = self.expressions(expression, flat=True) 1489 expressions = f" {expressions}" if expressions else "" 1490 return f"DESCRIBE{kind}{this}{expressions}" 1491 1492 def generatedasidentitycolumnconstraint_sql( 1493 self, expression: exp.GeneratedAsIdentityColumnConstraint 1494 ) -> str: 1495 start = expression.args.get("start") 1496 start = f" START {start}" if start else "" 1497 increment = expression.args.get("increment") 1498 increment = f" INCREMENT {increment}" if increment else "" 1499 1500 order = expression.args.get("order") 1501 if order is not None: 1502 order_clause = " ORDER" if order else " NOORDER" 1503 else: 1504 order_clause = "" 1505 1506 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1507 1508 def cluster_sql(self, expression: exp.Cluster) -> str: 1509 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1510 1511 def struct_sql(self, expression: exp.Struct) -> str: 1512 keys = [] 1513 values = [] 1514 1515 for i, e in enumerate(expression.expressions): 1516 if isinstance(e, exp.PropertyEQ): 1517 keys.append( 1518 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1519 ) 1520 values.append(e.expression) 1521 else: 1522 keys.append(exp.Literal.string(f"_{i}")) 1523 values.append(e) 1524 1525 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1526 1527 @unsupported_args("weight", "accuracy") 1528 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1529 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1530 1531 def alterset_sql(self, expression: exp.AlterSet) -> str: 1532 exprs = self.expressions(expression, flat=True) 1533 exprs = f" {exprs}" if exprs else "" 1534 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1535 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1536 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1537 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1538 tag = self.expressions(expression, key="tag", flat=True) 1539 tag = f" TAG {tag}" if tag else "" 1540 1541 return f"SET{exprs}{file_format}{copy_options}{tag}" 1542 1543 def strtotime_sql(self, expression: exp.StrToTime): 1544 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1545 return self.func( 1546 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1547 ) 1548 1549 def timestampsub_sql(self, expression: exp.TimestampSub): 1550 return self.sql( 1551 exp.TimestampAdd( 1552 this=expression.this, 1553 expression=expression.expression * -1, 1554 unit=expression.unit, 1555 ) 1556 ) 1557 1558 def jsonextract_sql(self, expression: exp.JSONExtract): 1559 this = expression.this 1560 1561 # JSON strings are valid coming from other dialects such as BQ so 1562 # for these cases we PARSE_JSON preemptively 1563 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1564 "requires_json" 1565 ): 1566 this = exp.ParseJSON(this=this) 1567 1568 return self.func( 1569 "GET_PATH", 1570 this, 1571 expression.expression, 1572 ) 1573 1574 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1575 this = expression.this 1576 if this.is_string: 1577 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1578 1579 return self.func("TO_CHAR", this, self.format_time(expression)) 1580 1581 def datesub_sql(self, expression: exp.DateSub) -> str: 1582 value = expression.expression 1583 if value: 1584 value.replace(value * (-1)) 1585 else: 1586 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1587 1588 return date_delta_sql("DATEADD")(self, expression) 1589 1590 def select_sql(self, expression: exp.Select) -> str: 1591 limit = expression.args.get("limit") 1592 offset = expression.args.get("offset") 1593 if offset and not limit: 1594 expression.limit(exp.Null(), copy=False) 1595 return super().select_sql(expression) 1596 1597 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1598 is_materialized = expression.find(exp.MaterializedProperty) 1599 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1600 1601 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1602 # For materialized views, COPY GRANTS is located *before* the columns list 1603 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1604 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1605 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1607 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1608 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1609 1610 this_name = self.sql(expression.this, "this") 1611 copy_grants = self.sql(copy_grants_property) 1612 this_schema = self.schema_columns_sql(expression.this) 1613 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1614 1615 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1616 1617 return super().createable_sql(expression, locations) 1618 1619 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1620 this = expression.this 1621 1622 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1623 # and add it later as part of the WITHIN GROUP clause 1624 order = this if isinstance(this, exp.Order) else None 1625 if order: 1626 expression.set("this", order.this.pop()) 1627 1628 expr_sql = super().arrayagg_sql(expression) 1629 1630 if order: 1631 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1632 1633 return expr_sql 1634 1635 def array_sql(self, expression: exp.Array) -> str: 1636 expressions = expression.expressions 1637 1638 first_expr = seq_get(expressions, 0) 1639 if isinstance(first_expr, exp.Select): 1640 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1641 if first_expr.text("kind").upper() == "STRUCT": 1642 object_construct_args = [] 1643 for expr in first_expr.expressions: 1644 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1645 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1646 name = expr.this if isinstance(expr, exp.Alias) else expr 1647 1648 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1649 1650 array_agg = exp.ArrayAgg( 1651 this=_build_object_construct(args=object_construct_args) 1652 ) 1653 1654 first_expr.set("kind", None) 1655 first_expr.set("expressions", [array_agg]) 1656 1657 return self.sql(first_expr.subquery()) 1658 1659 return inline_array_sql(self, expression) 1660 1661 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1662 zone = self.sql(expression, "this") 1663 if not zone: 1664 return super().currentdate_sql(expression) 1665 1666 expr = exp.Cast( 1667 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1668 to=exp.DataType(this=exp.DataType.Type.DATE), 1669 ) 1670 return self.sql(expr) 1671 1672 def dot_sql(self, expression: exp.Dot) -> str: 1673 this = expression.this 1674 1675 if not this.type: 1676 from sqlglot.optimizer.annotate_types import annotate_types 1677 1678 this = annotate_types(this, dialect=self.dialect) 1679 1680 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1681 # Generate colon notation for the top level STRUCT 1682 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1683 1684 return super().dot_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1354 def datatype_sql(self, expression: exp.DataType) -> str: 1355 expressions = expression.expressions 1356 if ( 1357 expressions 1358 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1359 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1360 ): 1361 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1362 return "OBJECT" 1363 1364 return super().datatype_sql(expression)
1375 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1376 milli = expression.args.get("milli") 1377 if milli is not None: 1378 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1379 expression.set("nano", milli_to_nano) 1380 1381 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1385 return self.func("TO_GEOGRAPHY", expression.this) 1386 if expression.is_type(exp.DataType.Type.GEOMETRY): 1387 return self.func("TO_GEOMETRY", expression.this) 1388 1389 return super().cast_sql(expression, safe_prefix=safe_prefix)
1391 def trycast_sql(self, expression: exp.TryCast) -> str: 1392 value = expression.this 1393 1394 if value.type is None: 1395 from sqlglot.optimizer.annotate_types import annotate_types 1396 1397 value = annotate_types(value, dialect=self.dialect) 1398 1399 # Snowflake requires that TRY_CAST's value be a string 1400 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1401 # if we can deduce that the value is a string, then we can generate TRY_CAST 1402 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1403 return super().trycast_sql(expression) 1404 1405 return self.cast_sql(expression)
1413 def unnest_sql(self, expression: exp.Unnest) -> str: 1414 unnest_alias = expression.args.get("alias") 1415 offset = expression.args.get("offset") 1416 1417 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1418 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1419 1420 columns = [ 1421 exp.to_identifier("seq"), 1422 exp.to_identifier("key"), 1423 exp.to_identifier("path"), 1424 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1425 value, 1426 exp.to_identifier("this"), 1427 ] 1428 1429 if unnest_alias: 1430 unnest_alias.set("columns", columns) 1431 else: 1432 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1433 1434 table_input = self.sql(expression.expressions[0]) 1435 if not table_input.startswith("INPUT =>"): 1436 table_input = f"INPUT => {table_input}" 1437 1438 expression_parent = expression.parent 1439 1440 explode = ( 1441 f"FLATTEN({table_input})" 1442 if isinstance(expression_parent, exp.Lateral) 1443 else f"TABLE(FLATTEN({table_input}))" 1444 ) 1445 alias = self.sql(unnest_alias) 1446 alias = f" AS {alias}" if alias else "" 1447 value = ( 1448 "" 1449 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1450 else f"{value} FROM " 1451 ) 1452 1453 return f"{value}{explode}{alias}"
1455 def show_sql(self, expression: exp.Show) -> str: 1456 terse = "TERSE " if expression.args.get("terse") else "" 1457 history = " HISTORY" if expression.args.get("history") else "" 1458 like = self.sql(expression, "like") 1459 like = f" LIKE {like}" if like else "" 1460 1461 scope = self.sql(expression, "scope") 1462 scope = f" {scope}" if scope else "" 1463 1464 scope_kind = self.sql(expression, "scope_kind") 1465 if scope_kind: 1466 scope_kind = f" IN {scope_kind}" 1467 1468 starts_with = self.sql(expression, "starts_with") 1469 if starts_with: 1470 starts_with = f" STARTS WITH {starts_with}" 1471 1472 limit = self.sql(expression, "limit") 1473 1474 from_ = self.sql(expression, "from") 1475 if from_: 1476 from_ = f" FROM {from_}" 1477 1478 privileges = self.expressions(expression, key="privileges", flat=True) 1479 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1480 1481 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1483 def describe_sql(self, expression: exp.Describe) -> str: 1484 # Default to table if kind is unknown 1485 kind_value = expression.args.get("kind") or "TABLE" 1486 kind = f" {kind_value}" if kind_value else "" 1487 this = f" {self.sql(expression, 'this')}" 1488 expressions = self.expressions(expression, flat=True) 1489 expressions = f" {expressions}" if expressions else "" 1490 return f"DESCRIBE{kind}{this}{expressions}"
1492 def generatedasidentitycolumnconstraint_sql( 1493 self, expression: exp.GeneratedAsIdentityColumnConstraint 1494 ) -> str: 1495 start = expression.args.get("start") 1496 start = f" START {start}" if start else "" 1497 increment = expression.args.get("increment") 1498 increment = f" INCREMENT {increment}" if increment else "" 1499 1500 order = expression.args.get("order") 1501 if order is not None: 1502 order_clause = " ORDER" if order else " NOORDER" 1503 else: 1504 order_clause = "" 1505 1506 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1511 def struct_sql(self, expression: exp.Struct) -> str: 1512 keys = [] 1513 values = [] 1514 1515 for i, e in enumerate(expression.expressions): 1516 if isinstance(e, exp.PropertyEQ): 1517 keys.append( 1518 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1519 ) 1520 values.append(e.expression) 1521 else: 1522 keys.append(exp.Literal.string(f"_{i}")) 1523 values.append(e) 1524 1525 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1531 def alterset_sql(self, expression: exp.AlterSet) -> str: 1532 exprs = self.expressions(expression, flat=True) 1533 exprs = f" {exprs}" if exprs else "" 1534 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1535 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1536 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1537 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1538 tag = self.expressions(expression, key="tag", flat=True) 1539 tag = f" TAG {tag}" if tag else "" 1540 1541 return f"SET{exprs}{file_format}{copy_options}{tag}"
1558 def jsonextract_sql(self, expression: exp.JSONExtract): 1559 this = expression.this 1560 1561 # JSON strings are valid coming from other dialects such as BQ so 1562 # for these cases we PARSE_JSON preemptively 1563 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1564 "requires_json" 1565 ): 1566 this = exp.ParseJSON(this=this) 1567 1568 return self.func( 1569 "GET_PATH", 1570 this, 1571 expression.expression, 1572 )
1581 def datesub_sql(self, expression: exp.DateSub) -> str: 1582 value = expression.expression 1583 if value: 1584 value.replace(value * (-1)) 1585 else: 1586 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1587 1588 return date_delta_sql("DATEADD")(self, expression)
1597 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1598 is_materialized = expression.find(exp.MaterializedProperty) 1599 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1600 1601 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1602 # For materialized views, COPY GRANTS is located *before* the columns list 1603 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1604 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1605 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1606 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1607 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1608 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1609 1610 this_name = self.sql(expression.this, "this") 1611 copy_grants = self.sql(copy_grants_property) 1612 this_schema = self.schema_columns_sql(expression.this) 1613 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1614 1615 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1616 1617 return super().createable_sql(expression, locations)
1619 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1620 this = expression.this 1621 1622 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1623 # and add it later as part of the WITHIN GROUP clause 1624 order = this if isinstance(this, exp.Order) else None 1625 if order: 1626 expression.set("this", order.this.pop()) 1627 1628 expr_sql = super().arrayagg_sql(expression) 1629 1630 if order: 1631 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1632 1633 return expr_sql
1635 def array_sql(self, expression: exp.Array) -> str: 1636 expressions = expression.expressions 1637 1638 first_expr = seq_get(expressions, 0) 1639 if isinstance(first_expr, exp.Select): 1640 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1641 if first_expr.text("kind").upper() == "STRUCT": 1642 object_construct_args = [] 1643 for expr in first_expr.expressions: 1644 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1645 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1646 name = expr.this if isinstance(expr, exp.Alias) else expr 1647 1648 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1649 1650 array_agg = exp.ArrayAgg( 1651 this=_build_object_construct(args=object_construct_args) 1652 ) 1653 1654 first_expr.set("kind", None) 1655 first_expr.set("expressions", [array_agg]) 1656 1657 return self.sql(first_expr.subquery()) 1658 1659 return inline_array_sql(self, expression)
1661 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1662 zone = self.sql(expression, "this") 1663 if not zone: 1664 return super().currentdate_sql(expression) 1665 1666 expr = exp.Cast( 1667 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1668 to=exp.DataType(this=exp.DataType.Type.DATE), 1669 ) 1670 return self.sql(expr)
1672 def dot_sql(self, expression: exp.Dot) -> str: 1673 this = expression.this 1674 1675 if not this.type: 1676 from sqlglot.optimizer.annotate_types import annotate_types 1677 1678 this = annotate_types(this, dialect=self.dialect) 1679 1680 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1681 # Generate colon notation for the top level STRUCT 1682 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1683 1684 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql