sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 126 if len(args) == 3: 127 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 128 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 129 130 131def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 132 regexp_replace = exp.RegexpReplace.from_arg_list(args) 133 134 if not regexp_replace.args.get("replacement"): 135 regexp_replace.set("replacement", exp.Literal.string("")) 136 137 return regexp_replace 138 139 140def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 141 def _parse(self: Snowflake.Parser) -> exp.Show: 142 return self._parse_show_snowflake(*args, **kwargs) 143 144 return _parse 145 146 147def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 148 trunc = date_trunc_to_time(args) 149 trunc.set("unit", map_date_part(trunc.args["unit"])) 150 return trunc 151 152 153def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 154 """ 155 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 156 so we need to unqualify them. 157 158 Example: 159 >>> from sqlglot import parse_one 160 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 161 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 162 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 163 """ 164 if isinstance(expression, exp.Pivot) and expression.unpivot: 165 expression = transforms.unqualify_columns(expression) 166 167 return expression 168 169 170def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 171 assert isinstance(expression, exp.Create) 172 173 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 174 if expression.this in exp.DataType.NESTED_TYPES: 175 expression.set("expressions", None) 176 return expression 177 178 props = expression.args.get("properties") 179 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 180 for schema_expression in expression.this.expressions: 181 if isinstance(schema_expression, exp.ColumnDef): 182 column_type = schema_expression.kind 183 if isinstance(column_type, exp.DataType): 184 column_type.transform(_flatten_structured_type, copy=False) 185 186 return expression 187 188 189class Snowflake(Dialect): 190 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 191 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 192 NULL_ORDERING = "nulls_are_large" 193 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SUPPORTS_SEMI_ANTI_JOIN = False 196 PREFER_CTE_ALIAS_COLUMN = True 197 TABLESAMPLE_SIZE_IS_PERCENT = True 198 COPY_PARAMS_ARE_CSV = False 199 200 TIME_MAPPING = { 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "mmmm": "%B", 207 "MON": "%b", 208 "mon": "%b", 209 "MM": "%m", 210 "mm": "%m", 211 "DD": "%d", 212 "dd": "%-d", 213 "DY": "%a", 214 "dy": "%w", 215 "HH24": "%H", 216 "hh24": "%H", 217 "HH12": "%I", 218 "hh12": "%I", 219 "MI": "%M", 220 "mi": "%M", 221 "SS": "%S", 222 "ss": "%S", 223 "FF": "%f", 224 "ff": "%f", 225 "FF6": "%f", 226 "ff6": "%f", 227 } 228 229 def quote_identifier(self, expression: E, identify: bool = True) -> E: 230 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 231 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 232 if ( 233 isinstance(expression, exp.Identifier) 234 and isinstance(expression.parent, exp.Table) 235 and expression.name.lower() == "dual" 236 ): 237 return expression # type: ignore 238 239 return super().quote_identifier(expression, identify=identify) 240 241 class Parser(parser.Parser): 242 IDENTIFY_PIVOT_STRINGS = True 243 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 244 COLON_IS_VARIANT_EXTRACT = True 245 246 ID_VAR_TOKENS = { 247 *parser.Parser.ID_VAR_TOKENS, 248 TokenType.MATCH_CONDITION, 249 } 250 251 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 252 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 257 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 258 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 259 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 260 this=seq_get(args, 1), expression=seq_get(args, 0) 261 ), 262 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 263 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 264 start=seq_get(args, 0), 265 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 266 step=seq_get(args, 2), 267 ), 268 "BITXOR": binary_from_function(exp.BitwiseXor), 269 "BIT_XOR": binary_from_function(exp.BitwiseXor), 270 "BOOLXOR": binary_from_function(exp.Xor), 271 "CONVERT_TIMEZONE": _build_convert_timezone, 272 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 273 "DATE_TRUNC": _date_trunc_to_time, 274 "DATEADD": _build_date_time_add(exp.DateAdd), 275 "DATEDIFF": _build_datediff, 276 "DIV0": _build_if_from_div0, 277 "FLATTEN": exp.Explode.from_arg_list, 278 "GET_PATH": lambda args, dialect: exp.JSONExtract( 279 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 280 ), 281 "IFF": exp.If.from_arg_list, 282 "LAST_DAY": lambda args: exp.LastDay( 283 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 284 ), 285 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 286 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LISTAGG": exp.GroupConcat.from_arg_list, 288 "MEDIAN": lambda args: exp.PercentileCont( 289 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 290 ), 291 "NULLIFZERO": _build_if_from_nullifzero, 292 "OBJECT_CONSTRUCT": _build_object_construct, 293 "REGEXP_REPLACE": _build_regexp_replace, 294 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 295 "RLIKE": exp.RegexpLike.from_arg_list, 296 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 297 "TIMEADD": _build_date_time_add(exp.TimeAdd), 298 "TIMEDIFF": _build_datediff, 299 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 300 "TIMESTAMPDIFF": _build_datediff, 301 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 302 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 303 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 304 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 305 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 306 "TO_NUMBER": lambda args: exp.ToNumber( 307 this=seq_get(args, 0), 308 format=seq_get(args, 1), 309 precision=seq_get(args, 2), 310 scale=seq_get(args, 3), 311 ), 312 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 313 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 315 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 316 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 317 "TO_VARCHAR": exp.ToChar.from_arg_list, 318 "ZEROIFNULL": _build_if_from_zeroifnull, 319 } 320 321 FUNCTION_PARSERS = { 322 **parser.Parser.FUNCTION_PARSERS, 323 "DATE_PART": lambda self: self._parse_date_part(), 324 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 325 } 326 FUNCTION_PARSERS.pop("TRIM") 327 328 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 329 330 RANGE_PARSERS = { 331 **parser.Parser.RANGE_PARSERS, 332 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 333 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 334 } 335 336 ALTER_PARSERS = { 337 **parser.Parser.ALTER_PARSERS, 338 "UNSET": lambda self: self.expression( 339 exp.Set, 340 tag=self._match_text_seq("TAG"), 341 expressions=self._parse_csv(self._parse_id_var), 342 unset=True, 343 ), 344 "SWAP": lambda self: self._parse_alter_table_swap(), 345 } 346 347 STATEMENT_PARSERS = { 348 **parser.Parser.STATEMENT_PARSERS, 349 TokenType.SHOW: lambda self: self._parse_show(), 350 } 351 352 PROPERTY_PARSERS = { 353 **parser.Parser.PROPERTY_PARSERS, 354 "LOCATION": lambda self: self._parse_location_property(), 355 } 356 357 TYPE_CONVERTERS = { 358 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 359 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 360 } 361 362 SHOW_PARSERS = { 363 "SCHEMAS": _show_parser("SCHEMAS"), 364 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 365 "OBJECTS": _show_parser("OBJECTS"), 366 "TERSE OBJECTS": _show_parser("OBJECTS"), 367 "TABLES": _show_parser("TABLES"), 368 "TERSE TABLES": _show_parser("TABLES"), 369 "VIEWS": _show_parser("VIEWS"), 370 "TERSE VIEWS": _show_parser("VIEWS"), 371 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 372 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 374 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 376 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "SEQUENCES": _show_parser("SEQUENCES"), 378 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 379 "COLUMNS": _show_parser("COLUMNS"), 380 "USERS": _show_parser("USERS"), 381 "TERSE USERS": _show_parser("USERS"), 382 } 383 384 CONSTRAINT_PARSERS = { 385 **parser.Parser.CONSTRAINT_PARSERS, 386 "WITH": lambda self: self._parse_with_constraint(), 387 "MASKING": lambda self: self._parse_with_constraint(), 388 "PROJECTION": lambda self: self._parse_with_constraint(), 389 "TAG": lambda self: self._parse_with_constraint(), 390 } 391 392 STAGED_FILE_SINGLE_TOKENS = { 393 TokenType.DOT, 394 TokenType.MOD, 395 TokenType.SLASH, 396 } 397 398 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 399 400 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 401 402 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 403 404 LAMBDAS = { 405 **parser.Parser.LAMBDAS, 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_assignment(), 410 expressions, 411 ), 412 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 413 ), 414 } 415 416 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 417 if self._prev.token_type != TokenType.WITH: 418 self._retreat(self._index - 1) 419 420 if self._match_text_seq("MASKING", "POLICY"): 421 policy = self._parse_column() 422 return self.expression( 423 exp.MaskingPolicyColumnConstraint, 424 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 425 expressions=self._match(TokenType.USING) 426 and self._parse_wrapped_csv(self._parse_id_var), 427 ) 428 if self._match_text_seq("PROJECTION", "POLICY"): 429 policy = self._parse_column() 430 return self.expression( 431 exp.ProjectionPolicyColumnConstraint, 432 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 433 ) 434 if self._match(TokenType.TAG): 435 return self.expression( 436 exp.TagColumnConstraint, 437 expressions=self._parse_wrapped_csv(self._parse_property), 438 ) 439 440 return None 441 442 def _parse_create(self) -> exp.Create | exp.Command: 443 expression = super()._parse_create() 444 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 445 # Replace the Table node with the enclosed Identifier 446 expression.this.replace(expression.this.this) 447 448 return expression 449 450 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 451 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 452 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 453 this = self._parse_var() or self._parse_type() 454 455 if not this: 456 return None 457 458 self._match(TokenType.COMMA) 459 expression = self._parse_bitwise() 460 this = map_date_part(this) 461 name = this.name.upper() 462 463 if name.startswith("EPOCH"): 464 if name == "EPOCH_MILLISECOND": 465 scale = 10**3 466 elif name == "EPOCH_MICROSECOND": 467 scale = 10**6 468 elif name == "EPOCH_NANOSECOND": 469 scale = 10**9 470 else: 471 scale = None 472 473 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 474 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 475 476 if scale: 477 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 478 479 return to_unix 480 481 return self.expression(exp.Extract, this=this, expression=expression) 482 483 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 484 if is_map: 485 # Keys are strings in Snowflake's objects, see also: 486 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 487 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 488 return self._parse_slice(self._parse_string()) 489 490 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 491 492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 493 lateral = super()._parse_lateral() 494 if not lateral: 495 return lateral 496 497 if isinstance(lateral.this, exp.Explode): 498 table_alias = lateral.args.get("alias") 499 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 500 if table_alias and not table_alias.args.get("columns"): 501 table_alias.set("columns", columns) 502 elif not table_alias: 503 exp.alias_(lateral, "_flattened", table=columns, copy=False) 504 505 return lateral 506 507 def _parse_table_parts( 508 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 509 ) -> exp.Table: 510 # https://docs.snowflake.com/en/user-guide/querying-stage 511 if self._match(TokenType.STRING, advance=False): 512 table = self._parse_string() 513 elif self._match_text_seq("@", advance=False): 514 table = self._parse_location_path() 515 else: 516 table = None 517 518 if table: 519 file_format = None 520 pattern = None 521 522 wrapped = self._match(TokenType.L_PAREN) 523 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 524 if self._match_text_seq("FILE_FORMAT", "=>"): 525 file_format = self._parse_string() or super()._parse_table_parts( 526 is_db_reference=is_db_reference 527 ) 528 elif self._match_text_seq("PATTERN", "=>"): 529 pattern = self._parse_string() 530 else: 531 break 532 533 self._match(TokenType.COMMA) 534 535 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 536 else: 537 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 538 539 return table 540 541 def _parse_id_var( 542 self, 543 any_token: bool = True, 544 tokens: t.Optional[t.Collection[TokenType]] = None, 545 ) -> t.Optional[exp.Expression]: 546 if self._match_text_seq("IDENTIFIER", "("): 547 identifier = ( 548 super()._parse_id_var(any_token=any_token, tokens=tokens) 549 or self._parse_string() 550 ) 551 self._match_r_paren() 552 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 553 554 return super()._parse_id_var(any_token=any_token, tokens=tokens) 555 556 def _parse_show_snowflake(self, this: str) -> exp.Show: 557 scope = None 558 scope_kind = None 559 560 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 561 # which is syntactically valid but has no effect on the output 562 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 563 564 history = self._match_text_seq("HISTORY") 565 566 like = self._parse_string() if self._match(TokenType.LIKE) else None 567 568 if self._match(TokenType.IN): 569 if self._match_text_seq("ACCOUNT"): 570 scope_kind = "ACCOUNT" 571 elif self._match_set(self.DB_CREATABLES): 572 scope_kind = self._prev.text.upper() 573 if self._curr: 574 scope = self._parse_table_parts() 575 elif self._curr: 576 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 577 scope = self._parse_table_parts() 578 579 return self.expression( 580 exp.Show, 581 **{ 582 "terse": terse, 583 "this": this, 584 "history": history, 585 "like": like, 586 "scope": scope, 587 "scope_kind": scope_kind, 588 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 589 "limit": self._parse_limit(), 590 "from": self._parse_string() if self._match(TokenType.FROM) else None, 591 }, 592 ) 593 594 def _parse_alter_table_swap(self) -> exp.SwapTable: 595 self._match_text_seq("WITH") 596 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 597 598 def _parse_location_property(self) -> exp.LocationProperty: 599 self._match(TokenType.EQ) 600 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 601 602 def _parse_file_location(self) -> t.Optional[exp.Expression]: 603 # Parse either a subquery or a staged file 604 return ( 605 self._parse_select(table=True, parse_subquery_alias=False) 606 if self._match(TokenType.L_PAREN, advance=False) 607 else self._parse_table_parts() 608 ) 609 610 def _parse_location_path(self) -> exp.Var: 611 parts = [self._advance_any(ignore_reserved=True)] 612 613 # We avoid consuming a comma token because external tables like @foo and @bar 614 # can be joined in a query with a comma separator, as well as closing paren 615 # in case of subqueries 616 while self._is_connected() and not self._match_set( 617 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 618 ): 619 parts.append(self._advance_any(ignore_reserved=True)) 620 621 return exp.var("".join(part.text for part in parts if part)) 622 623 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 624 this = super()._parse_lambda_arg() 625 626 if not this: 627 return this 628 629 typ = self._parse_types() 630 631 if typ: 632 return self.expression(exp.Cast, this=this, to=typ) 633 634 return this 635 636 class Tokenizer(tokens.Tokenizer): 637 STRING_ESCAPES = ["\\", "'"] 638 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 639 RAW_STRINGS = ["$$"] 640 COMMENTS = ["--", "//", ("/*", "*/")] 641 642 KEYWORDS = { 643 **tokens.Tokenizer.KEYWORDS, 644 "BYTEINT": TokenType.INT, 645 "CHAR VARYING": TokenType.VARCHAR, 646 "CHARACTER VARYING": TokenType.VARCHAR, 647 "EXCLUDE": TokenType.EXCEPT, 648 "ILIKE ANY": TokenType.ILIKE_ANY, 649 "LIKE ANY": TokenType.LIKE_ANY, 650 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 651 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 652 "MINUS": TokenType.EXCEPT, 653 "NCHAR VARYING": TokenType.VARCHAR, 654 "PUT": TokenType.COMMAND, 655 "REMOVE": TokenType.COMMAND, 656 "RM": TokenType.COMMAND, 657 "SAMPLE": TokenType.TABLE_SAMPLE, 658 "SQL_DOUBLE": TokenType.DOUBLE, 659 "SQL_VARCHAR": TokenType.VARCHAR, 660 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 661 "TAG": TokenType.TAG, 662 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 663 "TOP": TokenType.TOP, 664 "WAREHOUSE": TokenType.WAREHOUSE, 665 "STREAMLIT": TokenType.STREAMLIT, 666 } 667 KEYWORDS.pop("/*+") 668 669 SINGLE_TOKENS = { 670 **tokens.Tokenizer.SINGLE_TOKENS, 671 "$": TokenType.PARAMETER, 672 } 673 674 VAR_SINGLE_TOKENS = {"$"} 675 676 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 677 678 class Generator(generator.Generator): 679 PARAMETER_TOKEN = "$" 680 MATCHED_BY_SOURCE = False 681 SINGLE_STRING_INTERVAL = True 682 JOIN_HINTS = False 683 TABLE_HINTS = False 684 QUERY_HINTS = False 685 AGGREGATE_FILTER_SUPPORTED = False 686 SUPPORTS_TABLE_COPY = False 687 COLLATE_IS_FUNC = True 688 LIMIT_ONLY_LITERALS = True 689 JSON_KEY_VALUE_PAIR_SEP = "," 690 INSERT_OVERWRITE = " OVERWRITE INTO" 691 STRUCT_DELIMITER = ("(", ")") 692 COPY_PARAMS_ARE_WRAPPED = False 693 COPY_PARAMS_EQ_REQUIRED = True 694 STAR_EXCEPT = "EXCLUDE" 695 696 TRANSFORMS = { 697 **generator.Generator.TRANSFORMS, 698 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 699 exp.ArgMax: rename_func("MAX_BY"), 700 exp.ArgMin: rename_func("MIN_BY"), 701 exp.Array: inline_array_sql, 702 exp.ArrayConcat: rename_func("ARRAY_CAT"), 703 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 704 exp.AtTimeZone: lambda self, e: self.func( 705 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 706 ), 707 exp.BitwiseXor: rename_func("BITXOR"), 708 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 709 exp.DateAdd: date_delta_sql("DATEADD"), 710 exp.DateDiff: date_delta_sql("DATEDIFF"), 711 exp.DateStrToDate: datestrtodate_sql, 712 exp.DayOfMonth: rename_func("DAYOFMONTH"), 713 exp.DayOfWeek: rename_func("DAYOFWEEK"), 714 exp.DayOfYear: rename_func("DAYOFYEAR"), 715 exp.Explode: rename_func("FLATTEN"), 716 exp.Extract: rename_func("DATE_PART"), 717 exp.FromTimeZone: lambda self, e: self.func( 718 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 719 ), 720 exp.GenerateSeries: lambda self, e: self.func( 721 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 722 ), 723 exp.GroupConcat: rename_func("LISTAGG"), 724 exp.If: if_sql(name="IFF", false_value="NULL"), 725 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 726 exp.JSONExtractScalar: lambda self, e: self.func( 727 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 728 ), 729 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 730 exp.JSONPathRoot: lambda *_: "", 731 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 732 exp.LogicalOr: rename_func("BOOLOR_AGG"), 733 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 734 exp.Max: max_or_greatest, 735 exp.Min: min_or_least, 736 exp.ParseJSON: lambda self, e: self.func( 737 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 738 ), 739 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 740 exp.PercentileCont: transforms.preprocess( 741 [transforms.add_within_group_for_percentiles] 742 ), 743 exp.PercentileDisc: transforms.preprocess( 744 [transforms.add_within_group_for_percentiles] 745 ), 746 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 747 exp.RegexpILike: _regexpilike_sql, 748 exp.Rand: rename_func("RANDOM"), 749 exp.Select: transforms.preprocess( 750 [ 751 transforms.eliminate_distinct_on, 752 transforms.explode_to_unnest(), 753 transforms.eliminate_semi_and_anti_joins, 754 ] 755 ), 756 exp.SHA: rename_func("SHA1"), 757 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 758 exp.StartsWith: rename_func("STARTSWITH"), 759 exp.StrPosition: lambda self, e: self.func( 760 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 761 ), 762 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 763 exp.Stuff: rename_func("INSERT"), 764 exp.TimeAdd: date_delta_sql("TIMEADD"), 765 exp.TimestampDiff: lambda self, e: self.func( 766 "TIMESTAMPDIFF", e.unit, e.expression, e.this 767 ), 768 exp.TimestampTrunc: timestamptrunc_sql(), 769 exp.TimeStrToTime: timestrtotime_sql, 770 exp.TimeToStr: lambda self, e: self.func( 771 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 772 ), 773 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 774 exp.ToArray: rename_func("TO_ARRAY"), 775 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 776 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 777 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 778 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 779 exp.TsOrDsToDate: lambda self, e: self.func( 780 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 781 ), 782 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 783 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 784 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 785 exp.Xor: rename_func("BOOLXOR"), 786 } 787 788 SUPPORTED_JSON_PATH_PARTS = { 789 exp.JSONPathKey, 790 exp.JSONPathRoot, 791 exp.JSONPathSubscript, 792 } 793 794 TYPE_MAPPING = { 795 **generator.Generator.TYPE_MAPPING, 796 exp.DataType.Type.NESTED: "OBJECT", 797 exp.DataType.Type.STRUCT: "OBJECT", 798 } 799 800 PROPERTIES_LOCATION = { 801 **generator.Generator.PROPERTIES_LOCATION, 802 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 803 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 804 } 805 806 UNSUPPORTED_VALUES_EXPRESSIONS = { 807 exp.Map, 808 exp.StarMap, 809 exp.Struct, 810 exp.VarMap, 811 } 812 813 def with_properties(self, properties: exp.Properties) -> str: 814 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 815 816 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 817 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 818 values_as_table = False 819 820 return super().values_sql(expression, values_as_table=values_as_table) 821 822 def datatype_sql(self, expression: exp.DataType) -> str: 823 expressions = expression.expressions 824 if ( 825 expressions 826 and expression.is_type(*exp.DataType.STRUCT_TYPES) 827 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 828 ): 829 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 830 return "OBJECT" 831 832 return super().datatype_sql(expression) 833 834 def tonumber_sql(self, expression: exp.ToNumber) -> str: 835 return self.func( 836 "TO_NUMBER", 837 expression.this, 838 expression.args.get("format"), 839 expression.args.get("precision"), 840 expression.args.get("scale"), 841 ) 842 843 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 844 milli = expression.args.get("milli") 845 if milli is not None: 846 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 847 expression.set("nano", milli_to_nano) 848 849 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 850 851 def trycast_sql(self, expression: exp.TryCast) -> str: 852 value = expression.this 853 854 if value.type is None: 855 from sqlglot.optimizer.annotate_types import annotate_types 856 857 value = annotate_types(value) 858 859 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 860 return super().trycast_sql(expression) 861 862 # TRY_CAST only works for string values in Snowflake 863 return self.cast_sql(expression) 864 865 def log_sql(self, expression: exp.Log) -> str: 866 if not expression.expression: 867 return self.func("LN", expression.this) 868 869 return super().log_sql(expression) 870 871 def unnest_sql(self, expression: exp.Unnest) -> str: 872 unnest_alias = expression.args.get("alias") 873 offset = expression.args.get("offset") 874 875 columns = [ 876 exp.to_identifier("seq"), 877 exp.to_identifier("key"), 878 exp.to_identifier("path"), 879 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 880 seq_get(unnest_alias.columns if unnest_alias else [], 0) 881 or exp.to_identifier("value"), 882 exp.to_identifier("this"), 883 ] 884 885 if unnest_alias: 886 unnest_alias.set("columns", columns) 887 else: 888 unnest_alias = exp.TableAlias(this="_u", columns=columns) 889 890 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 891 alias = self.sql(unnest_alias) 892 alias = f" AS {alias}" if alias else "" 893 return f"{explode}{alias}" 894 895 def show_sql(self, expression: exp.Show) -> str: 896 terse = "TERSE " if expression.args.get("terse") else "" 897 history = " HISTORY" if expression.args.get("history") else "" 898 like = self.sql(expression, "like") 899 like = f" LIKE {like}" if like else "" 900 901 scope = self.sql(expression, "scope") 902 scope = f" {scope}" if scope else "" 903 904 scope_kind = self.sql(expression, "scope_kind") 905 if scope_kind: 906 scope_kind = f" IN {scope_kind}" 907 908 starts_with = self.sql(expression, "starts_with") 909 if starts_with: 910 starts_with = f" STARTS WITH {starts_with}" 911 912 limit = self.sql(expression, "limit") 913 914 from_ = self.sql(expression, "from") 915 if from_: 916 from_ = f" FROM {from_}" 917 918 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 919 920 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 921 # Other dialects don't support all of the following parameters, so we need to 922 # generate default values as necessary to ensure the transpilation is correct 923 group = expression.args.get("group") 924 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 925 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 926 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 927 928 return self.func( 929 "REGEXP_SUBSTR", 930 expression.this, 931 expression.expression, 932 position, 933 occurrence, 934 parameters, 935 group, 936 ) 937 938 def except_op(self, expression: exp.Except) -> str: 939 if not expression.args.get("distinct"): 940 self.unsupported("EXCEPT with All is not supported in Snowflake") 941 return super().except_op(expression) 942 943 def intersect_op(self, expression: exp.Intersect) -> str: 944 if not expression.args.get("distinct"): 945 self.unsupported("INTERSECT with All is not supported in Snowflake") 946 return super().intersect_op(expression) 947 948 def describe_sql(self, expression: exp.Describe) -> str: 949 # Default to table if kind is unknown 950 kind_value = expression.args.get("kind") or "TABLE" 951 kind = f" {kind_value}" if kind_value else "" 952 this = f" {self.sql(expression, 'this')}" 953 expressions = self.expressions(expression, flat=True) 954 expressions = f" {expressions}" if expressions else "" 955 return f"DESCRIBE{kind}{this}{expressions}" 956 957 def generatedasidentitycolumnconstraint_sql( 958 self, expression: exp.GeneratedAsIdentityColumnConstraint 959 ) -> str: 960 start = expression.args.get("start") 961 start = f" START {start}" if start else "" 962 increment = expression.args.get("increment") 963 increment = f" INCREMENT {increment}" if increment else "" 964 return f"AUTOINCREMENT{start}{increment}" 965 966 def swaptable_sql(self, expression: exp.SwapTable) -> str: 967 this = self.sql(expression, "this") 968 return f"SWAP WITH {this}" 969 970 def cluster_sql(self, expression: exp.Cluster) -> str: 971 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 972 973 def struct_sql(self, expression: exp.Struct) -> str: 974 keys = [] 975 values = [] 976 977 for i, e in enumerate(expression.expressions): 978 if isinstance(e, exp.PropertyEQ): 979 keys.append( 980 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 981 ) 982 values.append(e.expression) 983 else: 984 keys.append(exp.Literal.string(f"_{i}")) 985 values.append(e) 986 987 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 988 989 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 990 if expression.args.get("weight") or expression.args.get("accuracy"): 991 self.unsupported( 992 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 993 ) 994 995 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 996 997 def alterset_sql(self, expression: exp.AlterSet) -> str: 998 exprs = self.expressions(expression, flat=True) 999 exprs = f" {exprs}" if exprs else "" 1000 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1001 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1002 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1003 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1004 tag = self.expressions(expression, key="tag", flat=True) 1005 tag = f" TAG {tag}" if tag else "" 1006 1007 return f"SET{exprs}{file_format}{copy_options}{tag}"
190class Snowflake(Dialect): 191 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 192 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 193 NULL_ORDERING = "nulls_are_large" 194 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 PREFER_CTE_ALIAS_COLUMN = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 COPY_PARAMS_ARE_CSV = False 200 201 TIME_MAPPING = { 202 "YYYY": "%Y", 203 "yyyy": "%Y", 204 "YY": "%y", 205 "yy": "%y", 206 "MMMM": "%B", 207 "mmmm": "%B", 208 "MON": "%b", 209 "mon": "%b", 210 "MM": "%m", 211 "mm": "%m", 212 "DD": "%d", 213 "dd": "%-d", 214 "DY": "%a", 215 "dy": "%w", 216 "HH24": "%H", 217 "hh24": "%H", 218 "HH12": "%I", 219 "hh12": "%I", 220 "MI": "%M", 221 "mi": "%M", 222 "SS": "%S", 223 "ss": "%S", 224 "FF": "%f", 225 "ff": "%f", 226 "FF6": "%f", 227 "ff6": "%f", 228 } 229 230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify) 241 242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_VARIANT_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_table_parts( 509 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 510 ) -> exp.Table: 511 # https://docs.snowflake.com/en/user-guide/querying-stage 512 if self._match(TokenType.STRING, advance=False): 513 table = self._parse_string() 514 elif self._match_text_seq("@", advance=False): 515 table = self._parse_location_path() 516 else: 517 table = None 518 519 if table: 520 file_format = None 521 pattern = None 522 523 wrapped = self._match(TokenType.L_PAREN) 524 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 525 if self._match_text_seq("FILE_FORMAT", "=>"): 526 file_format = self._parse_string() or super()._parse_table_parts( 527 is_db_reference=is_db_reference 528 ) 529 elif self._match_text_seq("PATTERN", "=>"): 530 pattern = self._parse_string() 531 else: 532 break 533 534 self._match(TokenType.COMMA) 535 536 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 537 else: 538 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 539 540 return table 541 542 def _parse_id_var( 543 self, 544 any_token: bool = True, 545 tokens: t.Optional[t.Collection[TokenType]] = None, 546 ) -> t.Optional[exp.Expression]: 547 if self._match_text_seq("IDENTIFIER", "("): 548 identifier = ( 549 super()._parse_id_var(any_token=any_token, tokens=tokens) 550 or self._parse_string() 551 ) 552 self._match_r_paren() 553 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 554 555 return super()._parse_id_var(any_token=any_token, tokens=tokens) 556 557 def _parse_show_snowflake(self, this: str) -> exp.Show: 558 scope = None 559 scope_kind = None 560 561 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 562 # which is syntactically valid but has no effect on the output 563 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 564 565 history = self._match_text_seq("HISTORY") 566 567 like = self._parse_string() if self._match(TokenType.LIKE) else None 568 569 if self._match(TokenType.IN): 570 if self._match_text_seq("ACCOUNT"): 571 scope_kind = "ACCOUNT" 572 elif self._match_set(self.DB_CREATABLES): 573 scope_kind = self._prev.text.upper() 574 if self._curr: 575 scope = self._parse_table_parts() 576 elif self._curr: 577 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 578 scope = self._parse_table_parts() 579 580 return self.expression( 581 exp.Show, 582 **{ 583 "terse": terse, 584 "this": this, 585 "history": history, 586 "like": like, 587 "scope": scope, 588 "scope_kind": scope_kind, 589 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 590 "limit": self._parse_limit(), 591 "from": self._parse_string() if self._match(TokenType.FROM) else None, 592 }, 593 ) 594 595 def _parse_alter_table_swap(self) -> exp.SwapTable: 596 self._match_text_seq("WITH") 597 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 598 599 def _parse_location_property(self) -> exp.LocationProperty: 600 self._match(TokenType.EQ) 601 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 602 603 def _parse_file_location(self) -> t.Optional[exp.Expression]: 604 # Parse either a subquery or a staged file 605 return ( 606 self._parse_select(table=True, parse_subquery_alias=False) 607 if self._match(TokenType.L_PAREN, advance=False) 608 else self._parse_table_parts() 609 ) 610 611 def _parse_location_path(self) -> exp.Var: 612 parts = [self._advance_any(ignore_reserved=True)] 613 614 # We avoid consuming a comma token because external tables like @foo and @bar 615 # can be joined in a query with a comma separator, as well as closing paren 616 # in case of subqueries 617 while self._is_connected() and not self._match_set( 618 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 619 ): 620 parts.append(self._advance_any(ignore_reserved=True)) 621 622 return exp.var("".join(part.text for part in parts if part)) 623 624 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 625 this = super()._parse_lambda_arg() 626 627 if not this: 628 return this 629 630 typ = self._parse_types() 631 632 if typ: 633 return self.expression(exp.Cast, this=this, to=typ) 634 635 return this 636 637 class Tokenizer(tokens.Tokenizer): 638 STRING_ESCAPES = ["\\", "'"] 639 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 640 RAW_STRINGS = ["$$"] 641 COMMENTS = ["--", "//", ("/*", "*/")] 642 643 KEYWORDS = { 644 **tokens.Tokenizer.KEYWORDS, 645 "BYTEINT": TokenType.INT, 646 "CHAR VARYING": TokenType.VARCHAR, 647 "CHARACTER VARYING": TokenType.VARCHAR, 648 "EXCLUDE": TokenType.EXCEPT, 649 "ILIKE ANY": TokenType.ILIKE_ANY, 650 "LIKE ANY": TokenType.LIKE_ANY, 651 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 652 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 653 "MINUS": TokenType.EXCEPT, 654 "NCHAR VARYING": TokenType.VARCHAR, 655 "PUT": TokenType.COMMAND, 656 "REMOVE": TokenType.COMMAND, 657 "RM": TokenType.COMMAND, 658 "SAMPLE": TokenType.TABLE_SAMPLE, 659 "SQL_DOUBLE": TokenType.DOUBLE, 660 "SQL_VARCHAR": TokenType.VARCHAR, 661 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 662 "TAG": TokenType.TAG, 663 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 664 "TOP": TokenType.TOP, 665 "WAREHOUSE": TokenType.WAREHOUSE, 666 "STREAMLIT": TokenType.STREAMLIT, 667 } 668 KEYWORDS.pop("/*+") 669 670 SINGLE_TOKENS = { 671 **tokens.Tokenizer.SINGLE_TOKENS, 672 "$": TokenType.PARAMETER, 673 } 674 675 VAR_SINGLE_TOKENS = {"$"} 676 677 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 678 679 class Generator(generator.Generator): 680 PARAMETER_TOKEN = "$" 681 MATCHED_BY_SOURCE = False 682 SINGLE_STRING_INTERVAL = True 683 JOIN_HINTS = False 684 TABLE_HINTS = False 685 QUERY_HINTS = False 686 AGGREGATE_FILTER_SUPPORTED = False 687 SUPPORTS_TABLE_COPY = False 688 COLLATE_IS_FUNC = True 689 LIMIT_ONLY_LITERALS = True 690 JSON_KEY_VALUE_PAIR_SEP = "," 691 INSERT_OVERWRITE = " OVERWRITE INTO" 692 STRUCT_DELIMITER = ("(", ")") 693 COPY_PARAMS_ARE_WRAPPED = False 694 COPY_PARAMS_EQ_REQUIRED = True 695 STAR_EXCEPT = "EXCLUDE" 696 697 TRANSFORMS = { 698 **generator.Generator.TRANSFORMS, 699 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 700 exp.ArgMax: rename_func("MAX_BY"), 701 exp.ArgMin: rename_func("MIN_BY"), 702 exp.Array: inline_array_sql, 703 exp.ArrayConcat: rename_func("ARRAY_CAT"), 704 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 705 exp.AtTimeZone: lambda self, e: self.func( 706 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 707 ), 708 exp.BitwiseXor: rename_func("BITXOR"), 709 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 710 exp.DateAdd: date_delta_sql("DATEADD"), 711 exp.DateDiff: date_delta_sql("DATEDIFF"), 712 exp.DateStrToDate: datestrtodate_sql, 713 exp.DayOfMonth: rename_func("DAYOFMONTH"), 714 exp.DayOfWeek: rename_func("DAYOFWEEK"), 715 exp.DayOfYear: rename_func("DAYOFYEAR"), 716 exp.Explode: rename_func("FLATTEN"), 717 exp.Extract: rename_func("DATE_PART"), 718 exp.FromTimeZone: lambda self, e: self.func( 719 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 720 ), 721 exp.GenerateSeries: lambda self, e: self.func( 722 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 723 ), 724 exp.GroupConcat: rename_func("LISTAGG"), 725 exp.If: if_sql(name="IFF", false_value="NULL"), 726 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 727 exp.JSONExtractScalar: lambda self, e: self.func( 728 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 729 ), 730 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 731 exp.JSONPathRoot: lambda *_: "", 732 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 733 exp.LogicalOr: rename_func("BOOLOR_AGG"), 734 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 735 exp.Max: max_or_greatest, 736 exp.Min: min_or_least, 737 exp.ParseJSON: lambda self, e: self.func( 738 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 739 ), 740 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 741 exp.PercentileCont: transforms.preprocess( 742 [transforms.add_within_group_for_percentiles] 743 ), 744 exp.PercentileDisc: transforms.preprocess( 745 [transforms.add_within_group_for_percentiles] 746 ), 747 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 748 exp.RegexpILike: _regexpilike_sql, 749 exp.Rand: rename_func("RANDOM"), 750 exp.Select: transforms.preprocess( 751 [ 752 transforms.eliminate_distinct_on, 753 transforms.explode_to_unnest(), 754 transforms.eliminate_semi_and_anti_joins, 755 ] 756 ), 757 exp.SHA: rename_func("SHA1"), 758 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 759 exp.StartsWith: rename_func("STARTSWITH"), 760 exp.StrPosition: lambda self, e: self.func( 761 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 762 ), 763 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 764 exp.Stuff: rename_func("INSERT"), 765 exp.TimeAdd: date_delta_sql("TIMEADD"), 766 exp.TimestampDiff: lambda self, e: self.func( 767 "TIMESTAMPDIFF", e.unit, e.expression, e.this 768 ), 769 exp.TimestampTrunc: timestamptrunc_sql(), 770 exp.TimeStrToTime: timestrtotime_sql, 771 exp.TimeToStr: lambda self, e: self.func( 772 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 773 ), 774 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 775 exp.ToArray: rename_func("TO_ARRAY"), 776 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 777 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 778 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 779 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 780 exp.TsOrDsToDate: lambda self, e: self.func( 781 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 782 ), 783 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 784 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 785 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 786 exp.Xor: rename_func("BOOLXOR"), 787 } 788 789 SUPPORTED_JSON_PATH_PARTS = { 790 exp.JSONPathKey, 791 exp.JSONPathRoot, 792 exp.JSONPathSubscript, 793 } 794 795 TYPE_MAPPING = { 796 **generator.Generator.TYPE_MAPPING, 797 exp.DataType.Type.NESTED: "OBJECT", 798 exp.DataType.Type.STRUCT: "OBJECT", 799 } 800 801 PROPERTIES_LOCATION = { 802 **generator.Generator.PROPERTIES_LOCATION, 803 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 804 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 805 } 806 807 UNSUPPORTED_VALUES_EXPRESSIONS = { 808 exp.Map, 809 exp.StarMap, 810 exp.Struct, 811 exp.VarMap, 812 } 813 814 def with_properties(self, properties: exp.Properties) -> str: 815 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 816 817 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 818 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 819 values_as_table = False 820 821 return super().values_sql(expression, values_as_table=values_as_table) 822 823 def datatype_sql(self, expression: exp.DataType) -> str: 824 expressions = expression.expressions 825 if ( 826 expressions 827 and expression.is_type(*exp.DataType.STRUCT_TYPES) 828 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 829 ): 830 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 831 return "OBJECT" 832 833 return super().datatype_sql(expression) 834 835 def tonumber_sql(self, expression: exp.ToNumber) -> str: 836 return self.func( 837 "TO_NUMBER", 838 expression.this, 839 expression.args.get("format"), 840 expression.args.get("precision"), 841 expression.args.get("scale"), 842 ) 843 844 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 845 milli = expression.args.get("milli") 846 if milli is not None: 847 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 848 expression.set("nano", milli_to_nano) 849 850 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 851 852 def trycast_sql(self, expression: exp.TryCast) -> str: 853 value = expression.this 854 855 if value.type is None: 856 from sqlglot.optimizer.annotate_types import annotate_types 857 858 value = annotate_types(value) 859 860 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 861 return super().trycast_sql(expression) 862 863 # TRY_CAST only works for string values in Snowflake 864 return self.cast_sql(expression) 865 866 def log_sql(self, expression: exp.Log) -> str: 867 if not expression.expression: 868 return self.func("LN", expression.this) 869 870 return super().log_sql(expression) 871 872 def unnest_sql(self, expression: exp.Unnest) -> str: 873 unnest_alias = expression.args.get("alias") 874 offset = expression.args.get("offset") 875 876 columns = [ 877 exp.to_identifier("seq"), 878 exp.to_identifier("key"), 879 exp.to_identifier("path"), 880 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 881 seq_get(unnest_alias.columns if unnest_alias else [], 0) 882 or exp.to_identifier("value"), 883 exp.to_identifier("this"), 884 ] 885 886 if unnest_alias: 887 unnest_alias.set("columns", columns) 888 else: 889 unnest_alias = exp.TableAlias(this="_u", columns=columns) 890 891 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 892 alias = self.sql(unnest_alias) 893 alias = f" AS {alias}" if alias else "" 894 return f"{explode}{alias}" 895 896 def show_sql(self, expression: exp.Show) -> str: 897 terse = "TERSE " if expression.args.get("terse") else "" 898 history = " HISTORY" if expression.args.get("history") else "" 899 like = self.sql(expression, "like") 900 like = f" LIKE {like}" if like else "" 901 902 scope = self.sql(expression, "scope") 903 scope = f" {scope}" if scope else "" 904 905 scope_kind = self.sql(expression, "scope_kind") 906 if scope_kind: 907 scope_kind = f" IN {scope_kind}" 908 909 starts_with = self.sql(expression, "starts_with") 910 if starts_with: 911 starts_with = f" STARTS WITH {starts_with}" 912 913 limit = self.sql(expression, "limit") 914 915 from_ = self.sql(expression, "from") 916 if from_: 917 from_ = f" FROM {from_}" 918 919 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 920 921 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 922 # Other dialects don't support all of the following parameters, so we need to 923 # generate default values as necessary to ensure the transpilation is correct 924 group = expression.args.get("group") 925 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 926 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 927 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 928 929 return self.func( 930 "REGEXP_SUBSTR", 931 expression.this, 932 expression.expression, 933 position, 934 occurrence, 935 parameters, 936 group, 937 ) 938 939 def except_op(self, expression: exp.Except) -> str: 940 if not expression.args.get("distinct"): 941 self.unsupported("EXCEPT with All is not supported in Snowflake") 942 return super().except_op(expression) 943 944 def intersect_op(self, expression: exp.Intersect) -> str: 945 if not expression.args.get("distinct"): 946 self.unsupported("INTERSECT with All is not supported in Snowflake") 947 return super().intersect_op(expression) 948 949 def describe_sql(self, expression: exp.Describe) -> str: 950 # Default to table if kind is unknown 951 kind_value = expression.args.get("kind") or "TABLE" 952 kind = f" {kind_value}" if kind_value else "" 953 this = f" {self.sql(expression, 'this')}" 954 expressions = self.expressions(expression, flat=True) 955 expressions = f" {expressions}" if expressions else "" 956 return f"DESCRIBE{kind}{this}{expressions}" 957 958 def generatedasidentitycolumnconstraint_sql( 959 self, expression: exp.GeneratedAsIdentityColumnConstraint 960 ) -> str: 961 start = expression.args.get("start") 962 start = f" START {start}" if start else "" 963 increment = expression.args.get("increment") 964 increment = f" INCREMENT {increment}" if increment else "" 965 return f"AUTOINCREMENT{start}{increment}" 966 967 def swaptable_sql(self, expression: exp.SwapTable) -> str: 968 this = self.sql(expression, "this") 969 return f"SWAP WITH {this}" 970 971 def cluster_sql(self, expression: exp.Cluster) -> str: 972 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 973 974 def struct_sql(self, expression: exp.Struct) -> str: 975 keys = [] 976 values = [] 977 978 for i, e in enumerate(expression.expressions): 979 if isinstance(e, exp.PropertyEQ): 980 keys.append( 981 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 982 ) 983 values.append(e.expression) 984 else: 985 keys.append(exp.Literal.string(f"_{i}")) 986 values.append(e) 987 988 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 989 990 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 991 if expression.args.get("weight") or expression.args.get("accuracy"): 992 self.unsupported( 993 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 994 ) 995 996 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 997 998 def alterset_sql(self, expression: exp.AlterSet) -> str: 999 exprs = self.expressions(expression, flat=True) 1000 exprs = f" {exprs}" if exprs else "" 1001 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1002 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1003 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1004 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1005 tag = self.expressions(expression, key="tag", flat=True) 1006 tag = f" TAG {tag}" if tag else "" 1007 1008 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_VARIANT_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_table_parts( 509 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 510 ) -> exp.Table: 511 # https://docs.snowflake.com/en/user-guide/querying-stage 512 if self._match(TokenType.STRING, advance=False): 513 table = self._parse_string() 514 elif self._match_text_seq("@", advance=False): 515 table = self._parse_location_path() 516 else: 517 table = None 518 519 if table: 520 file_format = None 521 pattern = None 522 523 wrapped = self._match(TokenType.L_PAREN) 524 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 525 if self._match_text_seq("FILE_FORMAT", "=>"): 526 file_format = self._parse_string() or super()._parse_table_parts( 527 is_db_reference=is_db_reference 528 ) 529 elif self._match_text_seq("PATTERN", "=>"): 530 pattern = self._parse_string() 531 else: 532 break 533 534 self._match(TokenType.COMMA) 535 536 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 537 else: 538 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 539 540 return table 541 542 def _parse_id_var( 543 self, 544 any_token: bool = True, 545 tokens: t.Optional[t.Collection[TokenType]] = None, 546 ) -> t.Optional[exp.Expression]: 547 if self._match_text_seq("IDENTIFIER", "("): 548 identifier = ( 549 super()._parse_id_var(any_token=any_token, tokens=tokens) 550 or self._parse_string() 551 ) 552 self._match_r_paren() 553 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 554 555 return super()._parse_id_var(any_token=any_token, tokens=tokens) 556 557 def _parse_show_snowflake(self, this: str) -> exp.Show: 558 scope = None 559 scope_kind = None 560 561 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 562 # which is syntactically valid but has no effect on the output 563 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 564 565 history = self._match_text_seq("HISTORY") 566 567 like = self._parse_string() if self._match(TokenType.LIKE) else None 568 569 if self._match(TokenType.IN): 570 if self._match_text_seq("ACCOUNT"): 571 scope_kind = "ACCOUNT" 572 elif self._match_set(self.DB_CREATABLES): 573 scope_kind = self._prev.text.upper() 574 if self._curr: 575 scope = self._parse_table_parts() 576 elif self._curr: 577 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 578 scope = self._parse_table_parts() 579 580 return self.expression( 581 exp.Show, 582 **{ 583 "terse": terse, 584 "this": this, 585 "history": history, 586 "like": like, 587 "scope": scope, 588 "scope_kind": scope_kind, 589 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 590 "limit": self._parse_limit(), 591 "from": self._parse_string() if self._match(TokenType.FROM) else None, 592 }, 593 ) 594 595 def _parse_alter_table_swap(self) -> exp.SwapTable: 596 self._match_text_seq("WITH") 597 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 598 599 def _parse_location_property(self) -> exp.LocationProperty: 600 self._match(TokenType.EQ) 601 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 602 603 def _parse_file_location(self) -> t.Optional[exp.Expression]: 604 # Parse either a subquery or a staged file 605 return ( 606 self._parse_select(table=True, parse_subquery_alias=False) 607 if self._match(TokenType.L_PAREN, advance=False) 608 else self._parse_table_parts() 609 ) 610 611 def _parse_location_path(self) -> exp.Var: 612 parts = [self._advance_any(ignore_reserved=True)] 613 614 # We avoid consuming a comma token because external tables like @foo and @bar 615 # can be joined in a query with a comma separator, as well as closing paren 616 # in case of subqueries 617 while self._is_connected() and not self._match_set( 618 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 619 ): 620 parts.append(self._advance_any(ignore_reserved=True)) 621 622 return exp.var("".join(part.text for part in parts if part)) 623 624 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 625 this = super()._parse_lambda_arg() 626 627 if not this: 628 return this 629 630 typ = self._parse_types() 631 632 if typ: 633 return self.expression(exp.Cast, this=this, to=typ) 634 635 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
637 class Tokenizer(tokens.Tokenizer): 638 STRING_ESCAPES = ["\\", "'"] 639 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 640 RAW_STRINGS = ["$$"] 641 COMMENTS = ["--", "//", ("/*", "*/")] 642 643 KEYWORDS = { 644 **tokens.Tokenizer.KEYWORDS, 645 "BYTEINT": TokenType.INT, 646 "CHAR VARYING": TokenType.VARCHAR, 647 "CHARACTER VARYING": TokenType.VARCHAR, 648 "EXCLUDE": TokenType.EXCEPT, 649 "ILIKE ANY": TokenType.ILIKE_ANY, 650 "LIKE ANY": TokenType.LIKE_ANY, 651 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 652 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 653 "MINUS": TokenType.EXCEPT, 654 "NCHAR VARYING": TokenType.VARCHAR, 655 "PUT": TokenType.COMMAND, 656 "REMOVE": TokenType.COMMAND, 657 "RM": TokenType.COMMAND, 658 "SAMPLE": TokenType.TABLE_SAMPLE, 659 "SQL_DOUBLE": TokenType.DOUBLE, 660 "SQL_VARCHAR": TokenType.VARCHAR, 661 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 662 "TAG": TokenType.TAG, 663 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 664 "TOP": TokenType.TOP, 665 "WAREHOUSE": TokenType.WAREHOUSE, 666 "STREAMLIT": TokenType.STREAMLIT, 667 } 668 KEYWORDS.pop("/*+") 669 670 SINGLE_TOKENS = { 671 **tokens.Tokenizer.SINGLE_TOKENS, 672 "$": TokenType.PARAMETER, 673 } 674 675 VAR_SINGLE_TOKENS = {"$"} 676 677 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
679 class Generator(generator.Generator): 680 PARAMETER_TOKEN = "$" 681 MATCHED_BY_SOURCE = False 682 SINGLE_STRING_INTERVAL = True 683 JOIN_HINTS = False 684 TABLE_HINTS = False 685 QUERY_HINTS = False 686 AGGREGATE_FILTER_SUPPORTED = False 687 SUPPORTS_TABLE_COPY = False 688 COLLATE_IS_FUNC = True 689 LIMIT_ONLY_LITERALS = True 690 JSON_KEY_VALUE_PAIR_SEP = "," 691 INSERT_OVERWRITE = " OVERWRITE INTO" 692 STRUCT_DELIMITER = ("(", ")") 693 COPY_PARAMS_ARE_WRAPPED = False 694 COPY_PARAMS_EQ_REQUIRED = True 695 STAR_EXCEPT = "EXCLUDE" 696 697 TRANSFORMS = { 698 **generator.Generator.TRANSFORMS, 699 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 700 exp.ArgMax: rename_func("MAX_BY"), 701 exp.ArgMin: rename_func("MIN_BY"), 702 exp.Array: inline_array_sql, 703 exp.ArrayConcat: rename_func("ARRAY_CAT"), 704 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 705 exp.AtTimeZone: lambda self, e: self.func( 706 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 707 ), 708 exp.BitwiseXor: rename_func("BITXOR"), 709 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 710 exp.DateAdd: date_delta_sql("DATEADD"), 711 exp.DateDiff: date_delta_sql("DATEDIFF"), 712 exp.DateStrToDate: datestrtodate_sql, 713 exp.DayOfMonth: rename_func("DAYOFMONTH"), 714 exp.DayOfWeek: rename_func("DAYOFWEEK"), 715 exp.DayOfYear: rename_func("DAYOFYEAR"), 716 exp.Explode: rename_func("FLATTEN"), 717 exp.Extract: rename_func("DATE_PART"), 718 exp.FromTimeZone: lambda self, e: self.func( 719 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 720 ), 721 exp.GenerateSeries: lambda self, e: self.func( 722 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 723 ), 724 exp.GroupConcat: rename_func("LISTAGG"), 725 exp.If: if_sql(name="IFF", false_value="NULL"), 726 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 727 exp.JSONExtractScalar: lambda self, e: self.func( 728 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 729 ), 730 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 731 exp.JSONPathRoot: lambda *_: "", 732 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 733 exp.LogicalOr: rename_func("BOOLOR_AGG"), 734 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 735 exp.Max: max_or_greatest, 736 exp.Min: min_or_least, 737 exp.ParseJSON: lambda self, e: self.func( 738 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 739 ), 740 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 741 exp.PercentileCont: transforms.preprocess( 742 [transforms.add_within_group_for_percentiles] 743 ), 744 exp.PercentileDisc: transforms.preprocess( 745 [transforms.add_within_group_for_percentiles] 746 ), 747 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 748 exp.RegexpILike: _regexpilike_sql, 749 exp.Rand: rename_func("RANDOM"), 750 exp.Select: transforms.preprocess( 751 [ 752 transforms.eliminate_distinct_on, 753 transforms.explode_to_unnest(), 754 transforms.eliminate_semi_and_anti_joins, 755 ] 756 ), 757 exp.SHA: rename_func("SHA1"), 758 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 759 exp.StartsWith: rename_func("STARTSWITH"), 760 exp.StrPosition: lambda self, e: self.func( 761 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 762 ), 763 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 764 exp.Stuff: rename_func("INSERT"), 765 exp.TimeAdd: date_delta_sql("TIMEADD"), 766 exp.TimestampDiff: lambda self, e: self.func( 767 "TIMESTAMPDIFF", e.unit, e.expression, e.this 768 ), 769 exp.TimestampTrunc: timestamptrunc_sql(), 770 exp.TimeStrToTime: timestrtotime_sql, 771 exp.TimeToStr: lambda self, e: self.func( 772 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 773 ), 774 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 775 exp.ToArray: rename_func("TO_ARRAY"), 776 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 777 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 778 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 779 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 780 exp.TsOrDsToDate: lambda self, e: self.func( 781 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 782 ), 783 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 784 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 785 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 786 exp.Xor: rename_func("BOOLXOR"), 787 } 788 789 SUPPORTED_JSON_PATH_PARTS = { 790 exp.JSONPathKey, 791 exp.JSONPathRoot, 792 exp.JSONPathSubscript, 793 } 794 795 TYPE_MAPPING = { 796 **generator.Generator.TYPE_MAPPING, 797 exp.DataType.Type.NESTED: "OBJECT", 798 exp.DataType.Type.STRUCT: "OBJECT", 799 } 800 801 PROPERTIES_LOCATION = { 802 **generator.Generator.PROPERTIES_LOCATION, 803 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 804 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 805 } 806 807 UNSUPPORTED_VALUES_EXPRESSIONS = { 808 exp.Map, 809 exp.StarMap, 810 exp.Struct, 811 exp.VarMap, 812 } 813 814 def with_properties(self, properties: exp.Properties) -> str: 815 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 816 817 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 818 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 819 values_as_table = False 820 821 return super().values_sql(expression, values_as_table=values_as_table) 822 823 def datatype_sql(self, expression: exp.DataType) -> str: 824 expressions = expression.expressions 825 if ( 826 expressions 827 and expression.is_type(*exp.DataType.STRUCT_TYPES) 828 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 829 ): 830 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 831 return "OBJECT" 832 833 return super().datatype_sql(expression) 834 835 def tonumber_sql(self, expression: exp.ToNumber) -> str: 836 return self.func( 837 "TO_NUMBER", 838 expression.this, 839 expression.args.get("format"), 840 expression.args.get("precision"), 841 expression.args.get("scale"), 842 ) 843 844 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 845 milli = expression.args.get("milli") 846 if milli is not None: 847 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 848 expression.set("nano", milli_to_nano) 849 850 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 851 852 def trycast_sql(self, expression: exp.TryCast) -> str: 853 value = expression.this 854 855 if value.type is None: 856 from sqlglot.optimizer.annotate_types import annotate_types 857 858 value = annotate_types(value) 859 860 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 861 return super().trycast_sql(expression) 862 863 # TRY_CAST only works for string values in Snowflake 864 return self.cast_sql(expression) 865 866 def log_sql(self, expression: exp.Log) -> str: 867 if not expression.expression: 868 return self.func("LN", expression.this) 869 870 return super().log_sql(expression) 871 872 def unnest_sql(self, expression: exp.Unnest) -> str: 873 unnest_alias = expression.args.get("alias") 874 offset = expression.args.get("offset") 875 876 columns = [ 877 exp.to_identifier("seq"), 878 exp.to_identifier("key"), 879 exp.to_identifier("path"), 880 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 881 seq_get(unnest_alias.columns if unnest_alias else [], 0) 882 or exp.to_identifier("value"), 883 exp.to_identifier("this"), 884 ] 885 886 if unnest_alias: 887 unnest_alias.set("columns", columns) 888 else: 889 unnest_alias = exp.TableAlias(this="_u", columns=columns) 890 891 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 892 alias = self.sql(unnest_alias) 893 alias = f" AS {alias}" if alias else "" 894 return f"{explode}{alias}" 895 896 def show_sql(self, expression: exp.Show) -> str: 897 terse = "TERSE " if expression.args.get("terse") else "" 898 history = " HISTORY" if expression.args.get("history") else "" 899 like = self.sql(expression, "like") 900 like = f" LIKE {like}" if like else "" 901 902 scope = self.sql(expression, "scope") 903 scope = f" {scope}" if scope else "" 904 905 scope_kind = self.sql(expression, "scope_kind") 906 if scope_kind: 907 scope_kind = f" IN {scope_kind}" 908 909 starts_with = self.sql(expression, "starts_with") 910 if starts_with: 911 starts_with = f" STARTS WITH {starts_with}" 912 913 limit = self.sql(expression, "limit") 914 915 from_ = self.sql(expression, "from") 916 if from_: 917 from_ = f" FROM {from_}" 918 919 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 920 921 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 922 # Other dialects don't support all of the following parameters, so we need to 923 # generate default values as necessary to ensure the transpilation is correct 924 group = expression.args.get("group") 925 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 926 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 927 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 928 929 return self.func( 930 "REGEXP_SUBSTR", 931 expression.this, 932 expression.expression, 933 position, 934 occurrence, 935 parameters, 936 group, 937 ) 938 939 def except_op(self, expression: exp.Except) -> str: 940 if not expression.args.get("distinct"): 941 self.unsupported("EXCEPT with All is not supported in Snowflake") 942 return super().except_op(expression) 943 944 def intersect_op(self, expression: exp.Intersect) -> str: 945 if not expression.args.get("distinct"): 946 self.unsupported("INTERSECT with All is not supported in Snowflake") 947 return super().intersect_op(expression) 948 949 def describe_sql(self, expression: exp.Describe) -> str: 950 # Default to table if kind is unknown 951 kind_value = expression.args.get("kind") or "TABLE" 952 kind = f" {kind_value}" if kind_value else "" 953 this = f" {self.sql(expression, 'this')}" 954 expressions = self.expressions(expression, flat=True) 955 expressions = f" {expressions}" if expressions else "" 956 return f"DESCRIBE{kind}{this}{expressions}" 957 958 def generatedasidentitycolumnconstraint_sql( 959 self, expression: exp.GeneratedAsIdentityColumnConstraint 960 ) -> str: 961 start = expression.args.get("start") 962 start = f" START {start}" if start else "" 963 increment = expression.args.get("increment") 964 increment = f" INCREMENT {increment}" if increment else "" 965 return f"AUTOINCREMENT{start}{increment}" 966 967 def swaptable_sql(self, expression: exp.SwapTable) -> str: 968 this = self.sql(expression, "this") 969 return f"SWAP WITH {this}" 970 971 def cluster_sql(self, expression: exp.Cluster) -> str: 972 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 973 974 def struct_sql(self, expression: exp.Struct) -> str: 975 keys = [] 976 values = [] 977 978 for i, e in enumerate(expression.expressions): 979 if isinstance(e, exp.PropertyEQ): 980 keys.append( 981 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 982 ) 983 values.append(e.expression) 984 else: 985 keys.append(exp.Literal.string(f"_{i}")) 986 values.append(e) 987 988 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 989 990 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 991 if expression.args.get("weight") or expression.args.get("accuracy"): 992 self.unsupported( 993 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 994 ) 995 996 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 997 998 def alterset_sql(self, expression: exp.AlterSet) -> str: 999 exprs = self.expressions(expression, flat=True) 1000 exprs = f" {exprs}" if exprs else "" 1001 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1002 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1003 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1004 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1005 tag = self.expressions(expression, key="tag", flat=True) 1006 tag = f" TAG {tag}" if tag else "" 1007 1008 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
823 def datatype_sql(self, expression: exp.DataType) -> str: 824 expressions = expression.expressions 825 if ( 826 expressions 827 and expression.is_type(*exp.DataType.STRUCT_TYPES) 828 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 829 ): 830 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 831 return "OBJECT" 832 833 return super().datatype_sql(expression)
844 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 845 milli = expression.args.get("milli") 846 if milli is not None: 847 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 848 expression.set("nano", milli_to_nano) 849 850 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
852 def trycast_sql(self, expression: exp.TryCast) -> str: 853 value = expression.this 854 855 if value.type is None: 856 from sqlglot.optimizer.annotate_types import annotate_types 857 858 value = annotate_types(value) 859 860 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 861 return super().trycast_sql(expression) 862 863 # TRY_CAST only works for string values in Snowflake 864 return self.cast_sql(expression)
872 def unnest_sql(self, expression: exp.Unnest) -> str: 873 unnest_alias = expression.args.get("alias") 874 offset = expression.args.get("offset") 875 876 columns = [ 877 exp.to_identifier("seq"), 878 exp.to_identifier("key"), 879 exp.to_identifier("path"), 880 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 881 seq_get(unnest_alias.columns if unnest_alias else [], 0) 882 or exp.to_identifier("value"), 883 exp.to_identifier("this"), 884 ] 885 886 if unnest_alias: 887 unnest_alias.set("columns", columns) 888 else: 889 unnest_alias = exp.TableAlias(this="_u", columns=columns) 890 891 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 892 alias = self.sql(unnest_alias) 893 alias = f" AS {alias}" if alias else "" 894 return f"{explode}{alias}"
896 def show_sql(self, expression: exp.Show) -> str: 897 terse = "TERSE " if expression.args.get("terse") else "" 898 history = " HISTORY" if expression.args.get("history") else "" 899 like = self.sql(expression, "like") 900 like = f" LIKE {like}" if like else "" 901 902 scope = self.sql(expression, "scope") 903 scope = f" {scope}" if scope else "" 904 905 scope_kind = self.sql(expression, "scope_kind") 906 if scope_kind: 907 scope_kind = f" IN {scope_kind}" 908 909 starts_with = self.sql(expression, "starts_with") 910 if starts_with: 911 starts_with = f" STARTS WITH {starts_with}" 912 913 limit = self.sql(expression, "limit") 914 915 from_ = self.sql(expression, "from") 916 if from_: 917 from_ = f" FROM {from_}" 918 919 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
921 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 922 # Other dialects don't support all of the following parameters, so we need to 923 # generate default values as necessary to ensure the transpilation is correct 924 group = expression.args.get("group") 925 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 926 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 927 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 928 929 return self.func( 930 "REGEXP_SUBSTR", 931 expression.this, 932 expression.expression, 933 position, 934 occurrence, 935 parameters, 936 group, 937 )
949 def describe_sql(self, expression: exp.Describe) -> str: 950 # Default to table if kind is unknown 951 kind_value = expression.args.get("kind") or "TABLE" 952 kind = f" {kind_value}" if kind_value else "" 953 this = f" {self.sql(expression, 'this')}" 954 expressions = self.expressions(expression, flat=True) 955 expressions = f" {expressions}" if expressions else "" 956 return f"DESCRIBE{kind}{this}{expressions}"
958 def generatedasidentitycolumnconstraint_sql( 959 self, expression: exp.GeneratedAsIdentityColumnConstraint 960 ) -> str: 961 start = expression.args.get("start") 962 start = f" START {start}" if start else "" 963 increment = expression.args.get("increment") 964 increment = f" INCREMENT {increment}" if increment else "" 965 return f"AUTOINCREMENT{start}{increment}"
974 def struct_sql(self, expression: exp.Struct) -> str: 975 keys = [] 976 values = [] 977 978 for i, e in enumerate(expression.expressions): 979 if isinstance(e, exp.PropertyEQ): 980 keys.append( 981 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 982 ) 983 values.append(e.expression) 984 else: 985 keys.append(exp.Literal.string(f"_{i}")) 986 values.append(e) 987 988 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
990 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 991 if expression.args.get("weight") or expression.args.get("accuracy"): 992 self.unsupported( 993 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 994 ) 995 996 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
998 def alterset_sql(self, expression: exp.AlterSet) -> str: 999 exprs = self.expressions(expression, flat=True) 1000 exprs = f" {exprs}" if exprs else "" 1001 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1002 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1003 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1004 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1005 tag = self.expressions(expression, key="tag", flat=True) 1006 tag = f" TAG {tag}" if tag else "" 1007 1008 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql
- pad_sql