sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_safe_divide_sql, 26 no_timestamp_sql, 27 timestampdiff_sql, 28 no_make_interval_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import flatten, is_float, is_int, seq_get 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E, B 36 37 38# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 39def _build_datetime( 40 name: str, kind: exp.DataType.Type, safe: bool = False 41) -> t.Callable[[t.List], exp.Func]: 42 def _builder(args: t.List) -> exp.Func: 43 value = seq_get(args, 0) 44 scale_or_fmt = seq_get(args, 1) 45 46 int_value = value is not None and is_int(value.name) 47 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 48 49 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 50 # Converts calls like `TO_TIME('01:02:03')` into casts 51 if len(args) == 1 and value.is_string and not int_value: 52 return ( 53 exp.TryCast(this=value, to=exp.DataType.build(kind)) 54 if safe 55 else exp.cast(value, kind) 56 ) 57 58 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 59 # cases so we can transpile them, since they're relatively common 60 if kind == exp.DataType.Type.TIMESTAMP: 61 if not safe and (int_value or int_scale_or_fmt): 62 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 63 # it's not easily transpilable 64 return exp.UnixToTime(this=value, scale=scale_or_fmt) 65 if not int_scale_or_fmt and not is_float(value.name): 66 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 67 expr.set("safe", safe) 68 return expr 69 70 if kind == exp.DataType.Type.DATE and not int_value: 71 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 72 formatted_exp.set("safe", safe) 73 return formatted_exp 74 75 return exp.Anonymous(this=name, expressions=args) 76 77 return _builder 78 79 80def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 81 expression = parser.build_var_map(args) 82 83 if isinstance(expression, exp.StarMap): 84 return expression 85 86 return exp.Struct( 87 expressions=[ 88 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 89 ] 90 ) 91 92 93def _build_datediff(args: t.List) -> exp.DateDiff: 94 return exp.DateDiff( 95 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 96 ) 97 98 99def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 100 def _builder(args: t.List) -> E: 101 return expr_type( 102 this=seq_get(args, 2), 103 expression=seq_get(args, 1), 104 unit=map_date_part(seq_get(args, 0)), 105 ) 106 107 return _builder 108 109 110def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 111 def _builder(args: t.List) -> B | exp.Anonymous: 112 if len(args) == 3: 113 return exp.Anonymous(this=name, expressions=args) 114 115 return binary_from_function(expr_type)(args) 116 117 return _builder 118 119 120# https://docs.snowflake.com/en/sql-reference/functions/div0 121def _build_if_from_div0(args: t.List) -> exp.If: 122 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 123 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 124 125 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 126 exp.Is(this=lhs, expression=exp.null()).not_() 127 ) 128 true = exp.Literal.number(0) 129 false = exp.Div(this=lhs, expression=rhs) 130 return exp.If(this=cond, true=true, false=false) 131 132 133# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 134def _build_if_from_zeroifnull(args: t.List) -> exp.If: 135 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 136 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 137 138 139# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 140def _build_if_from_nullifzero(args: t.List) -> exp.If: 141 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 142 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 143 144 145def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 146 flag = expression.text("flag") 147 148 if "i" not in flag: 149 flag += "i" 150 151 return self.func( 152 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 153 ) 154 155 156def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 157 regexp_replace = exp.RegexpReplace.from_arg_list(args) 158 159 if not regexp_replace.args.get("replacement"): 160 regexp_replace.set("replacement", exp.Literal.string("")) 161 162 return regexp_replace 163 164 165def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 166 def _parse(self: Snowflake.Parser) -> exp.Show: 167 return self._parse_show_snowflake(*args, **kwargs) 168 169 return _parse 170 171 172def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 173 trunc = date_trunc_to_time(args) 174 trunc.set("unit", map_date_part(trunc.args["unit"])) 175 return trunc 176 177 178def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 179 """ 180 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 181 so we need to unqualify them. 182 183 Example: 184 >>> from sqlglot import parse_one 185 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 186 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 187 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 188 """ 189 if isinstance(expression, exp.Pivot) and expression.unpivot: 190 expression = transforms.unqualify_columns(expression) 191 192 return expression 193 194 195def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 196 assert isinstance(expression, exp.Create) 197 198 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 199 if expression.this in exp.DataType.NESTED_TYPES: 200 expression.set("expressions", None) 201 return expression 202 203 props = expression.args.get("properties") 204 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 205 for schema_expression in expression.this.expressions: 206 if isinstance(schema_expression, exp.ColumnDef): 207 column_type = schema_expression.kind 208 if isinstance(column_type, exp.DataType): 209 column_type.transform(_flatten_structured_type, copy=False) 210 211 return expression 212 213 214def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 215 generate_date_array = unnest.expressions[0] 216 start = generate_date_array.args.get("start") 217 end = generate_date_array.args.get("end") 218 step = generate_date_array.args.get("step") 219 220 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 221 return 222 223 unit = step.args.get("unit") 224 225 unnest_alias = unnest.args.get("alias") 226 if unnest_alias: 227 unnest_alias = unnest_alias.copy() 228 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 229 else: 230 sequence_value_name = "value" 231 232 # We'll add the next sequence value to the starting date and project the result 233 date_add = _build_date_time_add(exp.DateAdd)( 234 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 235 ).as_(sequence_value_name) 236 237 # We use DATEDIFF to compute the number of sequence values needed 238 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 239 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 240 ) 241 242 unnest.set("expressions", [number_sequence]) 243 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 244 245 246def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 247 if isinstance(expression, exp.Select): 248 for generate_date_array in expression.find_all(exp.GenerateDateArray): 249 parent = generate_date_array.parent 250 251 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 252 # query is the following (it'll be unnested properly on the next iteration due to copy): 253 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 254 if not isinstance(parent, exp.Unnest): 255 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 256 generate_date_array.replace( 257 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 258 ) 259 260 if ( 261 isinstance(parent, exp.Unnest) 262 and isinstance(parent.parent, (exp.From, exp.Join)) 263 and len(parent.expressions) == 1 264 ): 265 _unnest_generate_date_array(parent) 266 267 return expression 268 269 270def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 271 def _builder(args: t.List) -> E: 272 return expr_type( 273 this=seq_get(args, 0), 274 expression=seq_get(args, 1), 275 position=seq_get(args, 2), 276 occurrence=seq_get(args, 3), 277 parameters=seq_get(args, 4), 278 group=seq_get(args, 5) or exp.Literal.number(0), 279 ) 280 281 return _builder 282 283 284def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 285 # Other dialects don't support all of the following parameters, so we need to 286 # generate default values as necessary to ensure the transpilation is correct 287 group = expression.args.get("group") 288 289 # To avoid generating all these default values, we set group to None if 290 # it's 0 (also default value) which doesn't trigger the following chain 291 if group and group.name == "0": 292 group = None 293 294 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 295 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 296 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 297 298 return self.func( 299 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 300 expression.this, 301 expression.expression, 302 position, 303 occurrence, 304 parameters, 305 group, 306 ) 307 308 309def _json_extract_value_array_sql( 310 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 311) -> str: 312 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 313 ident = exp.to_identifier("x") 314 315 if isinstance(expression, exp.JSONValueArray): 316 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 317 else: 318 this = exp.ParseJSON(this=f"TO_JSON({ident})") 319 320 transform_lambda = exp.Lambda(expressions=[ident], this=this) 321 322 return self.func("TRANSFORM", json_extract, transform_lambda) 323 324 325class Snowflake(Dialect): 326 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 327 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 328 NULL_ORDERING = "nulls_are_large" 329 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 330 SUPPORTS_USER_DEFINED_TYPES = False 331 SUPPORTS_SEMI_ANTI_JOIN = False 332 PREFER_CTE_ALIAS_COLUMN = True 333 TABLESAMPLE_SIZE_IS_PERCENT = True 334 COPY_PARAMS_ARE_CSV = False 335 ARRAY_AGG_INCLUDES_NULLS = None 336 337 TIME_MAPPING = { 338 "YYYY": "%Y", 339 "yyyy": "%Y", 340 "YY": "%y", 341 "yy": "%y", 342 "MMMM": "%B", 343 "mmmm": "%B", 344 "MON": "%b", 345 "mon": "%b", 346 "MM": "%m", 347 "mm": "%m", 348 "DD": "%d", 349 "dd": "%-d", 350 "DY": "%a", 351 "dy": "%w", 352 "HH24": "%H", 353 "hh24": "%H", 354 "HH12": "%I", 355 "hh12": "%I", 356 "MI": "%M", 357 "mi": "%M", 358 "SS": "%S", 359 "ss": "%S", 360 "FF": "%f", 361 "ff": "%f", 362 "FF6": "%f", 363 "ff6": "%f", 364 } 365 366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify) 377 378 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 379 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 380 SINGLE_TOKENS.pop("$") 381 382 class Parser(parser.Parser): 383 IDENTIFY_PIVOT_STRINGS = True 384 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 385 COLON_IS_VARIANT_EXTRACT = True 386 387 ID_VAR_TOKENS = { 388 *parser.Parser.ID_VAR_TOKENS, 389 TokenType.MATCH_CONDITION, 390 } 391 392 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 393 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 394 395 FUNCTIONS = { 396 **parser.Parser.FUNCTIONS, 397 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 398 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 399 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 400 this=seq_get(args, 1), expression=seq_get(args, 0) 401 ), 402 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 403 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 404 start=seq_get(args, 0), 405 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 406 step=seq_get(args, 2), 407 ), 408 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 409 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 410 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 411 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 412 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 413 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 414 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 415 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 416 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 417 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 418 "DATE_TRUNC": _date_trunc_to_time, 419 "DATEADD": _build_date_time_add(exp.DateAdd), 420 "DATEDIFF": _build_datediff, 421 "DIV0": _build_if_from_div0, 422 "EDITDISTANCE": lambda args: exp.Levenshtein( 423 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 424 ), 425 "FLATTEN": exp.Explode.from_arg_list, 426 "GET_PATH": lambda args, dialect: exp.JSONExtract( 427 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 428 ), 429 "IFF": exp.If.from_arg_list, 430 "LAST_DAY": lambda args: exp.LastDay( 431 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 432 ), 433 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 434 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 435 "LISTAGG": exp.GroupConcat.from_arg_list, 436 "NULLIFZERO": _build_if_from_nullifzero, 437 "OBJECT_CONSTRUCT": _build_object_construct, 438 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 439 "REGEXP_REPLACE": _build_regexp_replace, 440 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 441 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 442 "RLIKE": exp.RegexpLike.from_arg_list, 443 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 444 "TIMEADD": _build_date_time_add(exp.TimeAdd), 445 "TIMEDIFF": _build_datediff, 446 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 447 "TIMESTAMPDIFF": _build_datediff, 448 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 449 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 450 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 451 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 452 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 453 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 454 "TRY_TO_TIMESTAMP": _build_datetime( 455 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 456 ), 457 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 458 "TO_NUMBER": lambda args: exp.ToNumber( 459 this=seq_get(args, 0), 460 format=seq_get(args, 1), 461 precision=seq_get(args, 2), 462 scale=seq_get(args, 3), 463 ), 464 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 465 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 466 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 467 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 468 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 469 "TO_VARCHAR": exp.ToChar.from_arg_list, 470 "ZEROIFNULL": _build_if_from_zeroifnull, 471 } 472 473 FUNCTION_PARSERS = { 474 **parser.Parser.FUNCTION_PARSERS, 475 "DATE_PART": lambda self: self._parse_date_part(), 476 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 477 } 478 FUNCTION_PARSERS.pop("TRIM") 479 480 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 481 482 RANGE_PARSERS = { 483 **parser.Parser.RANGE_PARSERS, 484 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 485 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 486 } 487 488 ALTER_PARSERS = { 489 **parser.Parser.ALTER_PARSERS, 490 "UNSET": lambda self: self.expression( 491 exp.Set, 492 tag=self._match_text_seq("TAG"), 493 expressions=self._parse_csv(self._parse_id_var), 494 unset=True, 495 ), 496 } 497 498 STATEMENT_PARSERS = { 499 **parser.Parser.STATEMENT_PARSERS, 500 TokenType.SHOW: lambda self: self._parse_show(), 501 } 502 503 PROPERTY_PARSERS = { 504 **parser.Parser.PROPERTY_PARSERS, 505 "LOCATION": lambda self: self._parse_location_property(), 506 "TAG": lambda self: self._parse_tag(), 507 } 508 509 TYPE_CONVERTERS = { 510 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 511 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 512 } 513 514 SHOW_PARSERS = { 515 "SCHEMAS": _show_parser("SCHEMAS"), 516 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 517 "OBJECTS": _show_parser("OBJECTS"), 518 "TERSE OBJECTS": _show_parser("OBJECTS"), 519 "TABLES": _show_parser("TABLES"), 520 "TERSE TABLES": _show_parser("TABLES"), 521 "VIEWS": _show_parser("VIEWS"), 522 "TERSE VIEWS": _show_parser("VIEWS"), 523 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 524 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 525 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 526 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 527 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 528 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 529 "SEQUENCES": _show_parser("SEQUENCES"), 530 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 531 "COLUMNS": _show_parser("COLUMNS"), 532 "USERS": _show_parser("USERS"), 533 "TERSE USERS": _show_parser("USERS"), 534 } 535 536 CONSTRAINT_PARSERS = { 537 **parser.Parser.CONSTRAINT_PARSERS, 538 "WITH": lambda self: self._parse_with_constraint(), 539 "MASKING": lambda self: self._parse_with_constraint(), 540 "PROJECTION": lambda self: self._parse_with_constraint(), 541 "TAG": lambda self: self._parse_with_constraint(), 542 } 543 544 STAGED_FILE_SINGLE_TOKENS = { 545 TokenType.DOT, 546 TokenType.MOD, 547 TokenType.SLASH, 548 } 549 550 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 551 552 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 553 554 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 555 556 LAMBDAS = { 557 **parser.Parser.LAMBDAS, 558 TokenType.ARROW: lambda self, expressions: self.expression( 559 exp.Lambda, 560 this=self._replace_lambda( 561 self._parse_assignment(), 562 expressions, 563 ), 564 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 565 ), 566 } 567 568 def _negate_range( 569 self, this: t.Optional[exp.Expression] = None 570 ) -> t.Optional[exp.Expression]: 571 if not this: 572 return this 573 574 query = this.args.get("query") 575 if isinstance(this, exp.In) and isinstance(query, exp.Query): 576 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 577 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 578 # which can produce different results (most likely a SnowFlake bug). 579 # 580 # https://docs.snowflake.com/en/sql-reference/functions/in 581 # Context: https://github.com/tobymao/sqlglot/issues/3890 582 return self.expression( 583 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 584 ) 585 586 return self.expression(exp.Not, this=this) 587 588 def _parse_tag(self) -> exp.Tags: 589 return self.expression( 590 exp.Tags, 591 expressions=self._parse_wrapped_csv(self._parse_property), 592 ) 593 594 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 595 if self._prev.token_type != TokenType.WITH: 596 self._retreat(self._index - 1) 597 598 if self._match_text_seq("MASKING", "POLICY"): 599 policy = self._parse_column() 600 return self.expression( 601 exp.MaskingPolicyColumnConstraint, 602 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 603 expressions=self._match(TokenType.USING) 604 and self._parse_wrapped_csv(self._parse_id_var), 605 ) 606 if self._match_text_seq("PROJECTION", "POLICY"): 607 policy = self._parse_column() 608 return self.expression( 609 exp.ProjectionPolicyColumnConstraint, 610 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 611 ) 612 if self._match(TokenType.TAG): 613 return self._parse_tag() 614 615 return None 616 617 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 618 if self._match(TokenType.TAG): 619 return self._parse_tag() 620 621 return super()._parse_with_property() 622 623 def _parse_create(self) -> exp.Create | exp.Command: 624 expression = super()._parse_create() 625 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 626 # Replace the Table node with the enclosed Identifier 627 expression.this.replace(expression.this.this) 628 629 return expression 630 631 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 632 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 633 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 634 this = self._parse_var() or self._parse_type() 635 636 if not this: 637 return None 638 639 self._match(TokenType.COMMA) 640 expression = self._parse_bitwise() 641 this = map_date_part(this) 642 name = this.name.upper() 643 644 if name.startswith("EPOCH"): 645 if name == "EPOCH_MILLISECOND": 646 scale = 10**3 647 elif name == "EPOCH_MICROSECOND": 648 scale = 10**6 649 elif name == "EPOCH_NANOSECOND": 650 scale = 10**9 651 else: 652 scale = None 653 654 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 655 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 656 657 if scale: 658 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 659 660 return to_unix 661 662 return self.expression(exp.Extract, this=this, expression=expression) 663 664 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 665 if is_map: 666 # Keys are strings in Snowflake's objects, see also: 667 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 668 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 669 return self._parse_slice(self._parse_string()) 670 671 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 672 673 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 674 lateral = super()._parse_lateral() 675 if not lateral: 676 return lateral 677 678 if isinstance(lateral.this, exp.Explode): 679 table_alias = lateral.args.get("alias") 680 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 681 if table_alias and not table_alias.args.get("columns"): 682 table_alias.set("columns", columns) 683 elif not table_alias: 684 exp.alias_(lateral, "_flattened", table=columns, copy=False) 685 686 return lateral 687 688 def _parse_table_parts( 689 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 690 ) -> exp.Table: 691 # https://docs.snowflake.com/en/user-guide/querying-stage 692 if self._match(TokenType.STRING, advance=False): 693 table = self._parse_string() 694 elif self._match_text_seq("@", advance=False): 695 table = self._parse_location_path() 696 else: 697 table = None 698 699 if table: 700 file_format = None 701 pattern = None 702 703 wrapped = self._match(TokenType.L_PAREN) 704 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 705 if self._match_text_seq("FILE_FORMAT", "=>"): 706 file_format = self._parse_string() or super()._parse_table_parts( 707 is_db_reference=is_db_reference 708 ) 709 elif self._match_text_seq("PATTERN", "=>"): 710 pattern = self._parse_string() 711 else: 712 break 713 714 self._match(TokenType.COMMA) 715 716 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 717 else: 718 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 719 720 return table 721 722 def _parse_id_var( 723 self, 724 any_token: bool = True, 725 tokens: t.Optional[t.Collection[TokenType]] = None, 726 ) -> t.Optional[exp.Expression]: 727 if self._match_text_seq("IDENTIFIER", "("): 728 identifier = ( 729 super()._parse_id_var(any_token=any_token, tokens=tokens) 730 or self._parse_string() 731 ) 732 self._match_r_paren() 733 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 734 735 return super()._parse_id_var(any_token=any_token, tokens=tokens) 736 737 def _parse_show_snowflake(self, this: str) -> exp.Show: 738 scope = None 739 scope_kind = None 740 741 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 742 # which is syntactically valid but has no effect on the output 743 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 744 745 history = self._match_text_seq("HISTORY") 746 747 like = self._parse_string() if self._match(TokenType.LIKE) else None 748 749 if self._match(TokenType.IN): 750 if self._match_text_seq("ACCOUNT"): 751 scope_kind = "ACCOUNT" 752 elif self._match_set(self.DB_CREATABLES): 753 scope_kind = self._prev.text.upper() 754 if self._curr: 755 scope = self._parse_table_parts() 756 elif self._curr: 757 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 758 scope = self._parse_table_parts() 759 760 return self.expression( 761 exp.Show, 762 **{ 763 "terse": terse, 764 "this": this, 765 "history": history, 766 "like": like, 767 "scope": scope, 768 "scope_kind": scope_kind, 769 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 770 "limit": self._parse_limit(), 771 "from": self._parse_string() if self._match(TokenType.FROM) else None, 772 }, 773 ) 774 775 def _parse_location_property(self) -> exp.LocationProperty: 776 self._match(TokenType.EQ) 777 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 778 779 def _parse_file_location(self) -> t.Optional[exp.Expression]: 780 # Parse either a subquery or a staged file 781 return ( 782 self._parse_select(table=True, parse_subquery_alias=False) 783 if self._match(TokenType.L_PAREN, advance=False) 784 else self._parse_table_parts() 785 ) 786 787 def _parse_location_path(self) -> exp.Var: 788 parts = [self._advance_any(ignore_reserved=True)] 789 790 # We avoid consuming a comma token because external tables like @foo and @bar 791 # can be joined in a query with a comma separator, as well as closing paren 792 # in case of subqueries 793 while self._is_connected() and not self._match_set( 794 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 795 ): 796 parts.append(self._advance_any(ignore_reserved=True)) 797 798 return exp.var("".join(part.text for part in parts if part)) 799 800 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 801 this = super()._parse_lambda_arg() 802 803 if not this: 804 return this 805 806 typ = self._parse_types() 807 808 if typ: 809 return self.expression(exp.Cast, this=this, to=typ) 810 811 return this 812 813 def _parse_foreign_key(self) -> exp.ForeignKey: 814 # inlineFK, the REFERENCES columns are implied 815 if self._match(TokenType.REFERENCES, advance=False): 816 return self.expression(exp.ForeignKey) 817 818 # outoflineFK, explicitly names the columns 819 return super()._parse_foreign_key() 820 821 class Tokenizer(tokens.Tokenizer): 822 STRING_ESCAPES = ["\\", "'"] 823 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 824 RAW_STRINGS = ["$$"] 825 COMMENTS = ["--", "//", ("/*", "*/")] 826 NESTED_COMMENTS = False 827 828 KEYWORDS = { 829 **tokens.Tokenizer.KEYWORDS, 830 "BYTEINT": TokenType.INT, 831 "CHAR VARYING": TokenType.VARCHAR, 832 "CHARACTER VARYING": TokenType.VARCHAR, 833 "EXCLUDE": TokenType.EXCEPT, 834 "ILIKE ANY": TokenType.ILIKE_ANY, 835 "LIKE ANY": TokenType.LIKE_ANY, 836 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 837 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 838 "MINUS": TokenType.EXCEPT, 839 "NCHAR VARYING": TokenType.VARCHAR, 840 "PUT": TokenType.COMMAND, 841 "REMOVE": TokenType.COMMAND, 842 "RM": TokenType.COMMAND, 843 "SAMPLE": TokenType.TABLE_SAMPLE, 844 "SQL_DOUBLE": TokenType.DOUBLE, 845 "SQL_VARCHAR": TokenType.VARCHAR, 846 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 847 "TAG": TokenType.TAG, 848 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 849 "TOP": TokenType.TOP, 850 "WAREHOUSE": TokenType.WAREHOUSE, 851 "STREAMLIT": TokenType.STREAMLIT, 852 } 853 KEYWORDS.pop("/*+") 854 855 SINGLE_TOKENS = { 856 **tokens.Tokenizer.SINGLE_TOKENS, 857 "$": TokenType.PARAMETER, 858 } 859 860 VAR_SINGLE_TOKENS = {"$"} 861 862 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 863 864 class Generator(generator.Generator): 865 PARAMETER_TOKEN = "$" 866 MATCHED_BY_SOURCE = False 867 SINGLE_STRING_INTERVAL = True 868 JOIN_HINTS = False 869 TABLE_HINTS = False 870 QUERY_HINTS = False 871 AGGREGATE_FILTER_SUPPORTED = False 872 SUPPORTS_TABLE_COPY = False 873 COLLATE_IS_FUNC = True 874 LIMIT_ONLY_LITERALS = True 875 JSON_KEY_VALUE_PAIR_SEP = "," 876 INSERT_OVERWRITE = " OVERWRITE INTO" 877 STRUCT_DELIMITER = ("(", ")") 878 COPY_PARAMS_ARE_WRAPPED = False 879 COPY_PARAMS_EQ_REQUIRED = True 880 STAR_EXCEPT = "EXCLUDE" 881 SUPPORTS_EXPLODING_PROJECTIONS = False 882 ARRAY_CONCAT_IS_VAR_LEN = False 883 SUPPORTS_CONVERT_TIMEZONE = True 884 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 885 SUPPORTS_MEDIAN = True 886 ARRAY_SIZE_NAME = "ARRAY_SIZE" 887 888 TRANSFORMS = { 889 **generator.Generator.TRANSFORMS, 890 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 891 exp.ArgMax: rename_func("MAX_BY"), 892 exp.ArgMin: rename_func("MIN_BY"), 893 exp.Array: inline_array_sql, 894 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 895 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 896 exp.AtTimeZone: lambda self, e: self.func( 897 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 898 ), 899 exp.BitwiseOr: rename_func("BITOR"), 900 exp.BitwiseXor: rename_func("BITXOR"), 901 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 902 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 903 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 904 exp.DateAdd: date_delta_sql("DATEADD"), 905 exp.DateDiff: date_delta_sql("DATEDIFF"), 906 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 907 exp.DatetimeDiff: timestampdiff_sql, 908 exp.DateStrToDate: datestrtodate_sql, 909 exp.DayOfMonth: rename_func("DAYOFMONTH"), 910 exp.DayOfWeek: rename_func("DAYOFWEEK"), 911 exp.DayOfYear: rename_func("DAYOFYEAR"), 912 exp.Explode: rename_func("FLATTEN"), 913 exp.Extract: rename_func("DATE_PART"), 914 exp.FromTimeZone: lambda self, e: self.func( 915 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 916 ), 917 exp.GenerateSeries: lambda self, e: self.func( 918 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 919 ), 920 exp.GroupConcat: rename_func("LISTAGG"), 921 exp.If: if_sql(name="IFF", false_value="NULL"), 922 exp.JSONExtractArray: _json_extract_value_array_sql, 923 exp.JSONExtractScalar: lambda self, e: self.func( 924 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 925 ), 926 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 927 exp.JSONPathRoot: lambda *_: "", 928 exp.JSONValueArray: _json_extract_value_array_sql, 929 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 930 exp.LogicalOr: rename_func("BOOLOR_AGG"), 931 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 932 exp.MakeInterval: no_make_interval_sql, 933 exp.Max: max_or_greatest, 934 exp.Min: min_or_least, 935 exp.ParseJSON: lambda self, e: self.func( 936 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 937 ), 938 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 939 exp.PercentileCont: transforms.preprocess( 940 [transforms.add_within_group_for_percentiles] 941 ), 942 exp.PercentileDisc: transforms.preprocess( 943 [transforms.add_within_group_for_percentiles] 944 ), 945 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 946 exp.RegexpExtract: _regexpextract_sql, 947 exp.RegexpExtractAll: _regexpextract_sql, 948 exp.RegexpILike: _regexpilike_sql, 949 exp.Rand: rename_func("RANDOM"), 950 exp.Select: transforms.preprocess( 951 [ 952 transforms.eliminate_distinct_on, 953 transforms.explode_to_unnest(), 954 transforms.eliminate_semi_and_anti_joins, 955 _transform_generate_date_array, 956 ] 957 ), 958 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 959 exp.SHA: rename_func("SHA1"), 960 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 961 exp.StartsWith: rename_func("STARTSWITH"), 962 exp.StrPosition: lambda self, e: self.func( 963 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 964 ), 965 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 966 exp.Stuff: rename_func("INSERT"), 967 exp.TimeAdd: date_delta_sql("TIMEADD"), 968 exp.Timestamp: no_timestamp_sql, 969 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 970 exp.TimestampDiff: lambda self, e: self.func( 971 "TIMESTAMPDIFF", e.unit, e.expression, e.this 972 ), 973 exp.TimestampTrunc: timestamptrunc_sql(), 974 exp.TimeStrToTime: timestrtotime_sql, 975 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 976 exp.ToArray: rename_func("TO_ARRAY"), 977 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 978 exp.ToDouble: rename_func("TO_DOUBLE"), 979 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 980 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 981 exp.TsOrDsToDate: lambda self, e: self.func( 982 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 983 ), 984 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 985 exp.Uuid: rename_func("UUID_STRING"), 986 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 987 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 988 exp.Xor: rename_func("BOOLXOR"), 989 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 990 rename_func("EDITDISTANCE") 991 ), 992 } 993 994 SUPPORTED_JSON_PATH_PARTS = { 995 exp.JSONPathKey, 996 exp.JSONPathRoot, 997 exp.JSONPathSubscript, 998 } 999 1000 TYPE_MAPPING = { 1001 **generator.Generator.TYPE_MAPPING, 1002 exp.DataType.Type.NESTED: "OBJECT", 1003 exp.DataType.Type.STRUCT: "OBJECT", 1004 } 1005 1006 PROPERTIES_LOCATION = { 1007 **generator.Generator.PROPERTIES_LOCATION, 1008 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1009 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1010 } 1011 1012 UNSUPPORTED_VALUES_EXPRESSIONS = { 1013 exp.Map, 1014 exp.StarMap, 1015 exp.Struct, 1016 exp.VarMap, 1017 } 1018 1019 def with_properties(self, properties: exp.Properties) -> str: 1020 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1021 1022 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1023 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1024 values_as_table = False 1025 1026 return super().values_sql(expression, values_as_table=values_as_table) 1027 1028 def datatype_sql(self, expression: exp.DataType) -> str: 1029 expressions = expression.expressions 1030 if ( 1031 expressions 1032 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1033 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1034 ): 1035 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1036 return "OBJECT" 1037 1038 return super().datatype_sql(expression) 1039 1040 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1041 return self.func( 1042 "TO_NUMBER", 1043 expression.this, 1044 expression.args.get("format"), 1045 expression.args.get("precision"), 1046 expression.args.get("scale"), 1047 ) 1048 1049 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1050 milli = expression.args.get("milli") 1051 if milli is not None: 1052 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1053 expression.set("nano", milli_to_nano) 1054 1055 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1056 1057 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1058 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1059 return self.func("TO_GEOGRAPHY", expression.this) 1060 if expression.is_type(exp.DataType.Type.GEOMETRY): 1061 return self.func("TO_GEOMETRY", expression.this) 1062 1063 return super().cast_sql(expression, safe_prefix=safe_prefix) 1064 1065 def trycast_sql(self, expression: exp.TryCast) -> str: 1066 value = expression.this 1067 1068 if value.type is None: 1069 from sqlglot.optimizer.annotate_types import annotate_types 1070 1071 value = annotate_types(value) 1072 1073 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1074 return super().trycast_sql(expression) 1075 1076 # TRY_CAST only works for string values in Snowflake 1077 return self.cast_sql(expression) 1078 1079 def log_sql(self, expression: exp.Log) -> str: 1080 if not expression.expression: 1081 return self.func("LN", expression.this) 1082 1083 return super().log_sql(expression) 1084 1085 def unnest_sql(self, expression: exp.Unnest) -> str: 1086 unnest_alias = expression.args.get("alias") 1087 offset = expression.args.get("offset") 1088 1089 columns = [ 1090 exp.to_identifier("seq"), 1091 exp.to_identifier("key"), 1092 exp.to_identifier("path"), 1093 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1094 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1095 or exp.to_identifier("value"), 1096 exp.to_identifier("this"), 1097 ] 1098 1099 if unnest_alias: 1100 unnest_alias.set("columns", columns) 1101 else: 1102 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1103 1104 table_input = self.sql(expression.expressions[0]) 1105 if not table_input.startswith("INPUT =>"): 1106 table_input = f"INPUT => {table_input}" 1107 1108 explode = f"TABLE(FLATTEN({table_input}))" 1109 alias = self.sql(unnest_alias) 1110 alias = f" AS {alias}" if alias else "" 1111 return f"{explode}{alias}" 1112 1113 def show_sql(self, expression: exp.Show) -> str: 1114 terse = "TERSE " if expression.args.get("terse") else "" 1115 history = " HISTORY" if expression.args.get("history") else "" 1116 like = self.sql(expression, "like") 1117 like = f" LIKE {like}" if like else "" 1118 1119 scope = self.sql(expression, "scope") 1120 scope = f" {scope}" if scope else "" 1121 1122 scope_kind = self.sql(expression, "scope_kind") 1123 if scope_kind: 1124 scope_kind = f" IN {scope_kind}" 1125 1126 starts_with = self.sql(expression, "starts_with") 1127 if starts_with: 1128 starts_with = f" STARTS WITH {starts_with}" 1129 1130 limit = self.sql(expression, "limit") 1131 1132 from_ = self.sql(expression, "from") 1133 if from_: 1134 from_ = f" FROM {from_}" 1135 1136 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1137 1138 def describe_sql(self, expression: exp.Describe) -> str: 1139 # Default to table if kind is unknown 1140 kind_value = expression.args.get("kind") or "TABLE" 1141 kind = f" {kind_value}" if kind_value else "" 1142 this = f" {self.sql(expression, 'this')}" 1143 expressions = self.expressions(expression, flat=True) 1144 expressions = f" {expressions}" if expressions else "" 1145 return f"DESCRIBE{kind}{this}{expressions}" 1146 1147 def generatedasidentitycolumnconstraint_sql( 1148 self, expression: exp.GeneratedAsIdentityColumnConstraint 1149 ) -> str: 1150 start = expression.args.get("start") 1151 start = f" START {start}" if start else "" 1152 increment = expression.args.get("increment") 1153 increment = f" INCREMENT {increment}" if increment else "" 1154 return f"AUTOINCREMENT{start}{increment}" 1155 1156 def cluster_sql(self, expression: exp.Cluster) -> str: 1157 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1158 1159 def struct_sql(self, expression: exp.Struct) -> str: 1160 keys = [] 1161 values = [] 1162 1163 for i, e in enumerate(expression.expressions): 1164 if isinstance(e, exp.PropertyEQ): 1165 keys.append( 1166 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1167 ) 1168 values.append(e.expression) 1169 else: 1170 keys.append(exp.Literal.string(f"_{i}")) 1171 values.append(e) 1172 1173 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1174 1175 @unsupported_args("weight", "accuracy") 1176 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1177 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1178 1179 def alterset_sql(self, expression: exp.AlterSet) -> str: 1180 exprs = self.expressions(expression, flat=True) 1181 exprs = f" {exprs}" if exprs else "" 1182 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1183 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1184 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1185 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1186 tag = self.expressions(expression, key="tag", flat=True) 1187 tag = f" TAG {tag}" if tag else "" 1188 1189 return f"SET{exprs}{file_format}{copy_options}{tag}" 1190 1191 def strtotime_sql(self, expression: exp.StrToTime): 1192 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1193 return self.func( 1194 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1195 ) 1196 1197 def timestampsub_sql(self, expression: exp.TimestampSub): 1198 return self.sql( 1199 exp.TimestampAdd( 1200 this=expression.this, 1201 expression=expression.expression * -1, 1202 unit=expression.unit, 1203 ) 1204 ) 1205 1206 def jsonextract_sql(self, expression: exp.JSONExtract): 1207 this = expression.this 1208 1209 # JSON strings are valid coming from other dialects such as BQ 1210 return self.func( 1211 "GET_PATH", 1212 exp.ParseJSON(this=this) if this.is_string else this, 1213 expression.expression, 1214 ) 1215 1216 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1217 this = expression.this 1218 if not isinstance(this, exp.TsOrDsToTimestamp): 1219 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1220 1221 return self.func("TO_CHAR", this, self.format_time(expression)) 1222 1223 def datesub_sql(self, expression: exp.DateSub) -> str: 1224 value = expression.expression 1225 if value: 1226 value.replace(value * (-1)) 1227 else: 1228 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1229 1230 return date_delta_sql("DATEADD")(self, expression) 1231 1232 def select_sql(self, expression: exp.Select) -> str: 1233 limit = expression.args.get("limit") 1234 offset = expression.args.get("offset") 1235 if offset and not limit: 1236 expression.limit(exp.Null(), copy=False) 1237 return super().select_sql(expression)
326class Snowflake(Dialect): 327 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 328 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 329 NULL_ORDERING = "nulls_are_large" 330 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 331 SUPPORTS_USER_DEFINED_TYPES = False 332 SUPPORTS_SEMI_ANTI_JOIN = False 333 PREFER_CTE_ALIAS_COLUMN = True 334 TABLESAMPLE_SIZE_IS_PERCENT = True 335 COPY_PARAMS_ARE_CSV = False 336 ARRAY_AGG_INCLUDES_NULLS = None 337 338 TIME_MAPPING = { 339 "YYYY": "%Y", 340 "yyyy": "%Y", 341 "YY": "%y", 342 "yy": "%y", 343 "MMMM": "%B", 344 "mmmm": "%B", 345 "MON": "%b", 346 "mon": "%b", 347 "MM": "%m", 348 "mm": "%m", 349 "DD": "%d", 350 "dd": "%-d", 351 "DY": "%a", 352 "dy": "%w", 353 "HH24": "%H", 354 "hh24": "%H", 355 "HH12": "%I", 356 "hh12": "%I", 357 "MI": "%M", 358 "mi": "%M", 359 "SS": "%S", 360 "ss": "%S", 361 "FF": "%f", 362 "ff": "%f", 363 "FF6": "%f", 364 "ff6": "%f", 365 } 366 367 def quote_identifier(self, expression: E, identify: bool = True) -> E: 368 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 369 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 370 if ( 371 isinstance(expression, exp.Identifier) 372 and isinstance(expression.parent, exp.Table) 373 and expression.name.lower() == "dual" 374 ): 375 return expression # type: ignore 376 377 return super().quote_identifier(expression, identify=identify) 378 379 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 380 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 381 SINGLE_TOKENS.pop("$") 382 383 class Parser(parser.Parser): 384 IDENTIFY_PIVOT_STRINGS = True 385 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 386 COLON_IS_VARIANT_EXTRACT = True 387 388 ID_VAR_TOKENS = { 389 *parser.Parser.ID_VAR_TOKENS, 390 TokenType.MATCH_CONDITION, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 394 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 395 396 FUNCTIONS = { 397 **parser.Parser.FUNCTIONS, 398 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 399 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 400 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 401 this=seq_get(args, 1), expression=seq_get(args, 0) 402 ), 403 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 404 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 405 start=seq_get(args, 0), 406 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 407 step=seq_get(args, 2), 408 ), 409 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 410 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 411 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 412 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 413 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 414 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 415 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 416 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 417 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 418 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 419 "DATE_TRUNC": _date_trunc_to_time, 420 "DATEADD": _build_date_time_add(exp.DateAdd), 421 "DATEDIFF": _build_datediff, 422 "DIV0": _build_if_from_div0, 423 "EDITDISTANCE": lambda args: exp.Levenshtein( 424 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 425 ), 426 "FLATTEN": exp.Explode.from_arg_list, 427 "GET_PATH": lambda args, dialect: exp.JSONExtract( 428 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 429 ), 430 "IFF": exp.If.from_arg_list, 431 "LAST_DAY": lambda args: exp.LastDay( 432 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 433 ), 434 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 435 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 436 "LISTAGG": exp.GroupConcat.from_arg_list, 437 "NULLIFZERO": _build_if_from_nullifzero, 438 "OBJECT_CONSTRUCT": _build_object_construct, 439 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 440 "REGEXP_REPLACE": _build_regexp_replace, 441 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 442 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 443 "RLIKE": exp.RegexpLike.from_arg_list, 444 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 445 "TIMEADD": _build_date_time_add(exp.TimeAdd), 446 "TIMEDIFF": _build_datediff, 447 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 448 "TIMESTAMPDIFF": _build_datediff, 449 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 450 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 451 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 452 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 453 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 454 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 455 "TRY_TO_TIMESTAMP": _build_datetime( 456 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 457 ), 458 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 459 "TO_NUMBER": lambda args: exp.ToNumber( 460 this=seq_get(args, 0), 461 format=seq_get(args, 1), 462 precision=seq_get(args, 2), 463 scale=seq_get(args, 3), 464 ), 465 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 466 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 467 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 468 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 469 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 470 "TO_VARCHAR": exp.ToChar.from_arg_list, 471 "ZEROIFNULL": _build_if_from_zeroifnull, 472 } 473 474 FUNCTION_PARSERS = { 475 **parser.Parser.FUNCTION_PARSERS, 476 "DATE_PART": lambda self: self._parse_date_part(), 477 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 478 } 479 FUNCTION_PARSERS.pop("TRIM") 480 481 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 486 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 487 } 488 489 ALTER_PARSERS = { 490 **parser.Parser.ALTER_PARSERS, 491 "UNSET": lambda self: self.expression( 492 exp.Set, 493 tag=self._match_text_seq("TAG"), 494 expressions=self._parse_csv(self._parse_id_var), 495 unset=True, 496 ), 497 } 498 499 STATEMENT_PARSERS = { 500 **parser.Parser.STATEMENT_PARSERS, 501 TokenType.SHOW: lambda self: self._parse_show(), 502 } 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "LOCATION": lambda self: self._parse_location_property(), 507 "TAG": lambda self: self._parse_tag(), 508 } 509 510 TYPE_CONVERTERS = { 511 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 512 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 513 } 514 515 SHOW_PARSERS = { 516 "SCHEMAS": _show_parser("SCHEMAS"), 517 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 518 "OBJECTS": _show_parser("OBJECTS"), 519 "TERSE OBJECTS": _show_parser("OBJECTS"), 520 "TABLES": _show_parser("TABLES"), 521 "TERSE TABLES": _show_parser("TABLES"), 522 "VIEWS": _show_parser("VIEWS"), 523 "TERSE VIEWS": _show_parser("VIEWS"), 524 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 525 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 526 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 527 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 528 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 529 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 530 "SEQUENCES": _show_parser("SEQUENCES"), 531 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 532 "COLUMNS": _show_parser("COLUMNS"), 533 "USERS": _show_parser("USERS"), 534 "TERSE USERS": _show_parser("USERS"), 535 } 536 537 CONSTRAINT_PARSERS = { 538 **parser.Parser.CONSTRAINT_PARSERS, 539 "WITH": lambda self: self._parse_with_constraint(), 540 "MASKING": lambda self: self._parse_with_constraint(), 541 "PROJECTION": lambda self: self._parse_with_constraint(), 542 "TAG": lambda self: self._parse_with_constraint(), 543 } 544 545 STAGED_FILE_SINGLE_TOKENS = { 546 TokenType.DOT, 547 TokenType.MOD, 548 TokenType.SLASH, 549 } 550 551 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 552 553 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 554 555 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 556 557 LAMBDAS = { 558 **parser.Parser.LAMBDAS, 559 TokenType.ARROW: lambda self, expressions: self.expression( 560 exp.Lambda, 561 this=self._replace_lambda( 562 self._parse_assignment(), 563 expressions, 564 ), 565 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 566 ), 567 } 568 569 def _negate_range( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 if not this: 573 return this 574 575 query = this.args.get("query") 576 if isinstance(this, exp.In) and isinstance(query, exp.Query): 577 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 578 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 579 # which can produce different results (most likely a SnowFlake bug). 580 # 581 # https://docs.snowflake.com/en/sql-reference/functions/in 582 # Context: https://github.com/tobymao/sqlglot/issues/3890 583 return self.expression( 584 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 585 ) 586 587 return self.expression(exp.Not, this=this) 588 589 def _parse_tag(self) -> exp.Tags: 590 return self.expression( 591 exp.Tags, 592 expressions=self._parse_wrapped_csv(self._parse_property), 593 ) 594 595 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 596 if self._prev.token_type != TokenType.WITH: 597 self._retreat(self._index - 1) 598 599 if self._match_text_seq("MASKING", "POLICY"): 600 policy = self._parse_column() 601 return self.expression( 602 exp.MaskingPolicyColumnConstraint, 603 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 604 expressions=self._match(TokenType.USING) 605 and self._parse_wrapped_csv(self._parse_id_var), 606 ) 607 if self._match_text_seq("PROJECTION", "POLICY"): 608 policy = self._parse_column() 609 return self.expression( 610 exp.ProjectionPolicyColumnConstraint, 611 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 612 ) 613 if self._match(TokenType.TAG): 614 return self._parse_tag() 615 616 return None 617 618 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 619 if self._match(TokenType.TAG): 620 return self._parse_tag() 621 622 return super()._parse_with_property() 623 624 def _parse_create(self) -> exp.Create | exp.Command: 625 expression = super()._parse_create() 626 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 627 # Replace the Table node with the enclosed Identifier 628 expression.this.replace(expression.this.this) 629 630 return expression 631 632 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 633 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 634 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 635 this = self._parse_var() or self._parse_type() 636 637 if not this: 638 return None 639 640 self._match(TokenType.COMMA) 641 expression = self._parse_bitwise() 642 this = map_date_part(this) 643 name = this.name.upper() 644 645 if name.startswith("EPOCH"): 646 if name == "EPOCH_MILLISECOND": 647 scale = 10**3 648 elif name == "EPOCH_MICROSECOND": 649 scale = 10**6 650 elif name == "EPOCH_NANOSECOND": 651 scale = 10**9 652 else: 653 scale = None 654 655 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 656 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 657 658 if scale: 659 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 660 661 return to_unix 662 663 return self.expression(exp.Extract, this=this, expression=expression) 664 665 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 666 if is_map: 667 # Keys are strings in Snowflake's objects, see also: 668 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 669 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 670 return self._parse_slice(self._parse_string()) 671 672 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 673 674 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 675 lateral = super()._parse_lateral() 676 if not lateral: 677 return lateral 678 679 if isinstance(lateral.this, exp.Explode): 680 table_alias = lateral.args.get("alias") 681 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 682 if table_alias and not table_alias.args.get("columns"): 683 table_alias.set("columns", columns) 684 elif not table_alias: 685 exp.alias_(lateral, "_flattened", table=columns, copy=False) 686 687 return lateral 688 689 def _parse_table_parts( 690 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 691 ) -> exp.Table: 692 # https://docs.snowflake.com/en/user-guide/querying-stage 693 if self._match(TokenType.STRING, advance=False): 694 table = self._parse_string() 695 elif self._match_text_seq("@", advance=False): 696 table = self._parse_location_path() 697 else: 698 table = None 699 700 if table: 701 file_format = None 702 pattern = None 703 704 wrapped = self._match(TokenType.L_PAREN) 705 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 706 if self._match_text_seq("FILE_FORMAT", "=>"): 707 file_format = self._parse_string() or super()._parse_table_parts( 708 is_db_reference=is_db_reference 709 ) 710 elif self._match_text_seq("PATTERN", "=>"): 711 pattern = self._parse_string() 712 else: 713 break 714 715 self._match(TokenType.COMMA) 716 717 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 718 else: 719 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 720 721 return table 722 723 def _parse_id_var( 724 self, 725 any_token: bool = True, 726 tokens: t.Optional[t.Collection[TokenType]] = None, 727 ) -> t.Optional[exp.Expression]: 728 if self._match_text_seq("IDENTIFIER", "("): 729 identifier = ( 730 super()._parse_id_var(any_token=any_token, tokens=tokens) 731 or self._parse_string() 732 ) 733 self._match_r_paren() 734 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 735 736 return super()._parse_id_var(any_token=any_token, tokens=tokens) 737 738 def _parse_show_snowflake(self, this: str) -> exp.Show: 739 scope = None 740 scope_kind = None 741 742 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 743 # which is syntactically valid but has no effect on the output 744 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 745 746 history = self._match_text_seq("HISTORY") 747 748 like = self._parse_string() if self._match(TokenType.LIKE) else None 749 750 if self._match(TokenType.IN): 751 if self._match_text_seq("ACCOUNT"): 752 scope_kind = "ACCOUNT" 753 elif self._match_set(self.DB_CREATABLES): 754 scope_kind = self._prev.text.upper() 755 if self._curr: 756 scope = self._parse_table_parts() 757 elif self._curr: 758 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 759 scope = self._parse_table_parts() 760 761 return self.expression( 762 exp.Show, 763 **{ 764 "terse": terse, 765 "this": this, 766 "history": history, 767 "like": like, 768 "scope": scope, 769 "scope_kind": scope_kind, 770 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 771 "limit": self._parse_limit(), 772 "from": self._parse_string() if self._match(TokenType.FROM) else None, 773 }, 774 ) 775 776 def _parse_location_property(self) -> exp.LocationProperty: 777 self._match(TokenType.EQ) 778 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 779 780 def _parse_file_location(self) -> t.Optional[exp.Expression]: 781 # Parse either a subquery or a staged file 782 return ( 783 self._parse_select(table=True, parse_subquery_alias=False) 784 if self._match(TokenType.L_PAREN, advance=False) 785 else self._parse_table_parts() 786 ) 787 788 def _parse_location_path(self) -> exp.Var: 789 parts = [self._advance_any(ignore_reserved=True)] 790 791 # We avoid consuming a comma token because external tables like @foo and @bar 792 # can be joined in a query with a comma separator, as well as closing paren 793 # in case of subqueries 794 while self._is_connected() and not self._match_set( 795 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 796 ): 797 parts.append(self._advance_any(ignore_reserved=True)) 798 799 return exp.var("".join(part.text for part in parts if part)) 800 801 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 802 this = super()._parse_lambda_arg() 803 804 if not this: 805 return this 806 807 typ = self._parse_types() 808 809 if typ: 810 return self.expression(exp.Cast, this=this, to=typ) 811 812 return this 813 814 def _parse_foreign_key(self) -> exp.ForeignKey: 815 # inlineFK, the REFERENCES columns are implied 816 if self._match(TokenType.REFERENCES, advance=False): 817 return self.expression(exp.ForeignKey) 818 819 # outoflineFK, explicitly names the columns 820 return super()._parse_foreign_key() 821 822 class Tokenizer(tokens.Tokenizer): 823 STRING_ESCAPES = ["\\", "'"] 824 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 825 RAW_STRINGS = ["$$"] 826 COMMENTS = ["--", "//", ("/*", "*/")] 827 NESTED_COMMENTS = False 828 829 KEYWORDS = { 830 **tokens.Tokenizer.KEYWORDS, 831 "BYTEINT": TokenType.INT, 832 "CHAR VARYING": TokenType.VARCHAR, 833 "CHARACTER VARYING": TokenType.VARCHAR, 834 "EXCLUDE": TokenType.EXCEPT, 835 "ILIKE ANY": TokenType.ILIKE_ANY, 836 "LIKE ANY": TokenType.LIKE_ANY, 837 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 838 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 839 "MINUS": TokenType.EXCEPT, 840 "NCHAR VARYING": TokenType.VARCHAR, 841 "PUT": TokenType.COMMAND, 842 "REMOVE": TokenType.COMMAND, 843 "RM": TokenType.COMMAND, 844 "SAMPLE": TokenType.TABLE_SAMPLE, 845 "SQL_DOUBLE": TokenType.DOUBLE, 846 "SQL_VARCHAR": TokenType.VARCHAR, 847 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 848 "TAG": TokenType.TAG, 849 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 850 "TOP": TokenType.TOP, 851 "WAREHOUSE": TokenType.WAREHOUSE, 852 "STREAMLIT": TokenType.STREAMLIT, 853 } 854 KEYWORDS.pop("/*+") 855 856 SINGLE_TOKENS = { 857 **tokens.Tokenizer.SINGLE_TOKENS, 858 "$": TokenType.PARAMETER, 859 } 860 861 VAR_SINGLE_TOKENS = {"$"} 862 863 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 864 865 class Generator(generator.Generator): 866 PARAMETER_TOKEN = "$" 867 MATCHED_BY_SOURCE = False 868 SINGLE_STRING_INTERVAL = True 869 JOIN_HINTS = False 870 TABLE_HINTS = False 871 QUERY_HINTS = False 872 AGGREGATE_FILTER_SUPPORTED = False 873 SUPPORTS_TABLE_COPY = False 874 COLLATE_IS_FUNC = True 875 LIMIT_ONLY_LITERALS = True 876 JSON_KEY_VALUE_PAIR_SEP = "," 877 INSERT_OVERWRITE = " OVERWRITE INTO" 878 STRUCT_DELIMITER = ("(", ")") 879 COPY_PARAMS_ARE_WRAPPED = False 880 COPY_PARAMS_EQ_REQUIRED = True 881 STAR_EXCEPT = "EXCLUDE" 882 SUPPORTS_EXPLODING_PROJECTIONS = False 883 ARRAY_CONCAT_IS_VAR_LEN = False 884 SUPPORTS_CONVERT_TIMEZONE = True 885 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 886 SUPPORTS_MEDIAN = True 887 ARRAY_SIZE_NAME = "ARRAY_SIZE" 888 889 TRANSFORMS = { 890 **generator.Generator.TRANSFORMS, 891 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 892 exp.ArgMax: rename_func("MAX_BY"), 893 exp.ArgMin: rename_func("MIN_BY"), 894 exp.Array: inline_array_sql, 895 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 896 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 897 exp.AtTimeZone: lambda self, e: self.func( 898 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 899 ), 900 exp.BitwiseOr: rename_func("BITOR"), 901 exp.BitwiseXor: rename_func("BITXOR"), 902 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 903 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 904 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 905 exp.DateAdd: date_delta_sql("DATEADD"), 906 exp.DateDiff: date_delta_sql("DATEDIFF"), 907 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 908 exp.DatetimeDiff: timestampdiff_sql, 909 exp.DateStrToDate: datestrtodate_sql, 910 exp.DayOfMonth: rename_func("DAYOFMONTH"), 911 exp.DayOfWeek: rename_func("DAYOFWEEK"), 912 exp.DayOfYear: rename_func("DAYOFYEAR"), 913 exp.Explode: rename_func("FLATTEN"), 914 exp.Extract: rename_func("DATE_PART"), 915 exp.FromTimeZone: lambda self, e: self.func( 916 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 917 ), 918 exp.GenerateSeries: lambda self, e: self.func( 919 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 920 ), 921 exp.GroupConcat: rename_func("LISTAGG"), 922 exp.If: if_sql(name="IFF", false_value="NULL"), 923 exp.JSONExtractArray: _json_extract_value_array_sql, 924 exp.JSONExtractScalar: lambda self, e: self.func( 925 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 926 ), 927 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 928 exp.JSONPathRoot: lambda *_: "", 929 exp.JSONValueArray: _json_extract_value_array_sql, 930 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 931 exp.LogicalOr: rename_func("BOOLOR_AGG"), 932 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 933 exp.MakeInterval: no_make_interval_sql, 934 exp.Max: max_or_greatest, 935 exp.Min: min_or_least, 936 exp.ParseJSON: lambda self, e: self.func( 937 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 938 ), 939 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 940 exp.PercentileCont: transforms.preprocess( 941 [transforms.add_within_group_for_percentiles] 942 ), 943 exp.PercentileDisc: transforms.preprocess( 944 [transforms.add_within_group_for_percentiles] 945 ), 946 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 947 exp.RegexpExtract: _regexpextract_sql, 948 exp.RegexpExtractAll: _regexpextract_sql, 949 exp.RegexpILike: _regexpilike_sql, 950 exp.Rand: rename_func("RANDOM"), 951 exp.Select: transforms.preprocess( 952 [ 953 transforms.eliminate_distinct_on, 954 transforms.explode_to_unnest(), 955 transforms.eliminate_semi_and_anti_joins, 956 _transform_generate_date_array, 957 ] 958 ), 959 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 960 exp.SHA: rename_func("SHA1"), 961 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 962 exp.StartsWith: rename_func("STARTSWITH"), 963 exp.StrPosition: lambda self, e: self.func( 964 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 965 ), 966 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 967 exp.Stuff: rename_func("INSERT"), 968 exp.TimeAdd: date_delta_sql("TIMEADD"), 969 exp.Timestamp: no_timestamp_sql, 970 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 971 exp.TimestampDiff: lambda self, e: self.func( 972 "TIMESTAMPDIFF", e.unit, e.expression, e.this 973 ), 974 exp.TimestampTrunc: timestamptrunc_sql(), 975 exp.TimeStrToTime: timestrtotime_sql, 976 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 977 exp.ToArray: rename_func("TO_ARRAY"), 978 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 979 exp.ToDouble: rename_func("TO_DOUBLE"), 980 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 981 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 982 exp.TsOrDsToDate: lambda self, e: self.func( 983 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 984 ), 985 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 986 exp.Uuid: rename_func("UUID_STRING"), 987 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 988 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 989 exp.Xor: rename_func("BOOLXOR"), 990 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 991 rename_func("EDITDISTANCE") 992 ), 993 } 994 995 SUPPORTED_JSON_PATH_PARTS = { 996 exp.JSONPathKey, 997 exp.JSONPathRoot, 998 exp.JSONPathSubscript, 999 } 1000 1001 TYPE_MAPPING = { 1002 **generator.Generator.TYPE_MAPPING, 1003 exp.DataType.Type.NESTED: "OBJECT", 1004 exp.DataType.Type.STRUCT: "OBJECT", 1005 } 1006 1007 PROPERTIES_LOCATION = { 1008 **generator.Generator.PROPERTIES_LOCATION, 1009 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1010 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1011 } 1012 1013 UNSUPPORTED_VALUES_EXPRESSIONS = { 1014 exp.Map, 1015 exp.StarMap, 1016 exp.Struct, 1017 exp.VarMap, 1018 } 1019 1020 def with_properties(self, properties: exp.Properties) -> str: 1021 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1022 1023 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1024 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1025 values_as_table = False 1026 1027 return super().values_sql(expression, values_as_table=values_as_table) 1028 1029 def datatype_sql(self, expression: exp.DataType) -> str: 1030 expressions = expression.expressions 1031 if ( 1032 expressions 1033 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1034 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1035 ): 1036 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1037 return "OBJECT" 1038 1039 return super().datatype_sql(expression) 1040 1041 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1042 return self.func( 1043 "TO_NUMBER", 1044 expression.this, 1045 expression.args.get("format"), 1046 expression.args.get("precision"), 1047 expression.args.get("scale"), 1048 ) 1049 1050 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1051 milli = expression.args.get("milli") 1052 if milli is not None: 1053 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1054 expression.set("nano", milli_to_nano) 1055 1056 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1057 1058 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1059 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1060 return self.func("TO_GEOGRAPHY", expression.this) 1061 if expression.is_type(exp.DataType.Type.GEOMETRY): 1062 return self.func("TO_GEOMETRY", expression.this) 1063 1064 return super().cast_sql(expression, safe_prefix=safe_prefix) 1065 1066 def trycast_sql(self, expression: exp.TryCast) -> str: 1067 value = expression.this 1068 1069 if value.type is None: 1070 from sqlglot.optimizer.annotate_types import annotate_types 1071 1072 value = annotate_types(value) 1073 1074 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1075 return super().trycast_sql(expression) 1076 1077 # TRY_CAST only works for string values in Snowflake 1078 return self.cast_sql(expression) 1079 1080 def log_sql(self, expression: exp.Log) -> str: 1081 if not expression.expression: 1082 return self.func("LN", expression.this) 1083 1084 return super().log_sql(expression) 1085 1086 def unnest_sql(self, expression: exp.Unnest) -> str: 1087 unnest_alias = expression.args.get("alias") 1088 offset = expression.args.get("offset") 1089 1090 columns = [ 1091 exp.to_identifier("seq"), 1092 exp.to_identifier("key"), 1093 exp.to_identifier("path"), 1094 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1095 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1096 or exp.to_identifier("value"), 1097 exp.to_identifier("this"), 1098 ] 1099 1100 if unnest_alias: 1101 unnest_alias.set("columns", columns) 1102 else: 1103 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1104 1105 table_input = self.sql(expression.expressions[0]) 1106 if not table_input.startswith("INPUT =>"): 1107 table_input = f"INPUT => {table_input}" 1108 1109 explode = f"TABLE(FLATTEN({table_input}))" 1110 alias = self.sql(unnest_alias) 1111 alias = f" AS {alias}" if alias else "" 1112 return f"{explode}{alias}" 1113 1114 def show_sql(self, expression: exp.Show) -> str: 1115 terse = "TERSE " if expression.args.get("terse") else "" 1116 history = " HISTORY" if expression.args.get("history") else "" 1117 like = self.sql(expression, "like") 1118 like = f" LIKE {like}" if like else "" 1119 1120 scope = self.sql(expression, "scope") 1121 scope = f" {scope}" if scope else "" 1122 1123 scope_kind = self.sql(expression, "scope_kind") 1124 if scope_kind: 1125 scope_kind = f" IN {scope_kind}" 1126 1127 starts_with = self.sql(expression, "starts_with") 1128 if starts_with: 1129 starts_with = f" STARTS WITH {starts_with}" 1130 1131 limit = self.sql(expression, "limit") 1132 1133 from_ = self.sql(expression, "from") 1134 if from_: 1135 from_ = f" FROM {from_}" 1136 1137 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1138 1139 def describe_sql(self, expression: exp.Describe) -> str: 1140 # Default to table if kind is unknown 1141 kind_value = expression.args.get("kind") or "TABLE" 1142 kind = f" {kind_value}" if kind_value else "" 1143 this = f" {self.sql(expression, 'this')}" 1144 expressions = self.expressions(expression, flat=True) 1145 expressions = f" {expressions}" if expressions else "" 1146 return f"DESCRIBE{kind}{this}{expressions}" 1147 1148 def generatedasidentitycolumnconstraint_sql( 1149 self, expression: exp.GeneratedAsIdentityColumnConstraint 1150 ) -> str: 1151 start = expression.args.get("start") 1152 start = f" START {start}" if start else "" 1153 increment = expression.args.get("increment") 1154 increment = f" INCREMENT {increment}" if increment else "" 1155 return f"AUTOINCREMENT{start}{increment}" 1156 1157 def cluster_sql(self, expression: exp.Cluster) -> str: 1158 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1159 1160 def struct_sql(self, expression: exp.Struct) -> str: 1161 keys = [] 1162 values = [] 1163 1164 for i, e in enumerate(expression.expressions): 1165 if isinstance(e, exp.PropertyEQ): 1166 keys.append( 1167 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1168 ) 1169 values.append(e.expression) 1170 else: 1171 keys.append(exp.Literal.string(f"_{i}")) 1172 values.append(e) 1173 1174 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1175 1176 @unsupported_args("weight", "accuracy") 1177 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1178 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1179 1180 def alterset_sql(self, expression: exp.AlterSet) -> str: 1181 exprs = self.expressions(expression, flat=True) 1182 exprs = f" {exprs}" if exprs else "" 1183 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1184 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1185 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1186 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1187 tag = self.expressions(expression, key="tag", flat=True) 1188 tag = f" TAG {tag}" if tag else "" 1189 1190 return f"SET{exprs}{file_format}{copy_options}{tag}" 1191 1192 def strtotime_sql(self, expression: exp.StrToTime): 1193 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1194 return self.func( 1195 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1196 ) 1197 1198 def timestampsub_sql(self, expression: exp.TimestampSub): 1199 return self.sql( 1200 exp.TimestampAdd( 1201 this=expression.this, 1202 expression=expression.expression * -1, 1203 unit=expression.unit, 1204 ) 1205 ) 1206 1207 def jsonextract_sql(self, expression: exp.JSONExtract): 1208 this = expression.this 1209 1210 # JSON strings are valid coming from other dialects such as BQ 1211 return self.func( 1212 "GET_PATH", 1213 exp.ParseJSON(this=this) if this.is_string else this, 1214 expression.expression, 1215 ) 1216 1217 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1218 this = expression.this 1219 if not isinstance(this, exp.TsOrDsToTimestamp): 1220 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1221 1222 return self.func("TO_CHAR", this, self.format_time(expression)) 1223 1224 def datesub_sql(self, expression: exp.DateSub) -> str: 1225 value = expression.expression 1226 if value: 1227 value.replace(value * (-1)) 1228 else: 1229 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1230 1231 return date_delta_sql("DATEADD")(self, expression) 1232 1233 def select_sql(self, expression: exp.Select) -> str: 1234 limit = expression.args.get("limit") 1235 offset = expression.args.get("offset") 1236 if offset and not limit: 1237 expression.limit(exp.Null(), copy=False) 1238 return super().select_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
367 def quote_identifier(self, expression: E, identify: bool = True) -> E: 368 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 369 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 370 if ( 371 isinstance(expression, exp.Identifier) 372 and isinstance(expression.parent, exp.Table) 373 and expression.name.lower() == "dual" 374 ): 375 return expression # type: ignore 376 377 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- NUMBERS_CAN_BE_UNDERSCORE_SEPARATED
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
379 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 380 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 381 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
383 class Parser(parser.Parser): 384 IDENTIFY_PIVOT_STRINGS = True 385 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 386 COLON_IS_VARIANT_EXTRACT = True 387 388 ID_VAR_TOKENS = { 389 *parser.Parser.ID_VAR_TOKENS, 390 TokenType.MATCH_CONDITION, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 394 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 395 396 FUNCTIONS = { 397 **parser.Parser.FUNCTIONS, 398 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 399 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 400 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 401 this=seq_get(args, 1), expression=seq_get(args, 0) 402 ), 403 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 404 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 405 start=seq_get(args, 0), 406 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 407 step=seq_get(args, 2), 408 ), 409 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 410 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 411 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 412 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 413 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 414 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 415 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 416 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 417 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 418 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 419 "DATE_TRUNC": _date_trunc_to_time, 420 "DATEADD": _build_date_time_add(exp.DateAdd), 421 "DATEDIFF": _build_datediff, 422 "DIV0": _build_if_from_div0, 423 "EDITDISTANCE": lambda args: exp.Levenshtein( 424 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 425 ), 426 "FLATTEN": exp.Explode.from_arg_list, 427 "GET_PATH": lambda args, dialect: exp.JSONExtract( 428 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 429 ), 430 "IFF": exp.If.from_arg_list, 431 "LAST_DAY": lambda args: exp.LastDay( 432 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 433 ), 434 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 435 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 436 "LISTAGG": exp.GroupConcat.from_arg_list, 437 "NULLIFZERO": _build_if_from_nullifzero, 438 "OBJECT_CONSTRUCT": _build_object_construct, 439 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 440 "REGEXP_REPLACE": _build_regexp_replace, 441 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 442 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 443 "RLIKE": exp.RegexpLike.from_arg_list, 444 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 445 "TIMEADD": _build_date_time_add(exp.TimeAdd), 446 "TIMEDIFF": _build_datediff, 447 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 448 "TIMESTAMPDIFF": _build_datediff, 449 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 450 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 451 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 452 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 453 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 454 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 455 "TRY_TO_TIMESTAMP": _build_datetime( 456 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 457 ), 458 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 459 "TO_NUMBER": lambda args: exp.ToNumber( 460 this=seq_get(args, 0), 461 format=seq_get(args, 1), 462 precision=seq_get(args, 2), 463 scale=seq_get(args, 3), 464 ), 465 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 466 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 467 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 468 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 469 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 470 "TO_VARCHAR": exp.ToChar.from_arg_list, 471 "ZEROIFNULL": _build_if_from_zeroifnull, 472 } 473 474 FUNCTION_PARSERS = { 475 **parser.Parser.FUNCTION_PARSERS, 476 "DATE_PART": lambda self: self._parse_date_part(), 477 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 478 } 479 FUNCTION_PARSERS.pop("TRIM") 480 481 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 486 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 487 } 488 489 ALTER_PARSERS = { 490 **parser.Parser.ALTER_PARSERS, 491 "UNSET": lambda self: self.expression( 492 exp.Set, 493 tag=self._match_text_seq("TAG"), 494 expressions=self._parse_csv(self._parse_id_var), 495 unset=True, 496 ), 497 } 498 499 STATEMENT_PARSERS = { 500 **parser.Parser.STATEMENT_PARSERS, 501 TokenType.SHOW: lambda self: self._parse_show(), 502 } 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "LOCATION": lambda self: self._parse_location_property(), 507 "TAG": lambda self: self._parse_tag(), 508 } 509 510 TYPE_CONVERTERS = { 511 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 512 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 513 } 514 515 SHOW_PARSERS = { 516 "SCHEMAS": _show_parser("SCHEMAS"), 517 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 518 "OBJECTS": _show_parser("OBJECTS"), 519 "TERSE OBJECTS": _show_parser("OBJECTS"), 520 "TABLES": _show_parser("TABLES"), 521 "TERSE TABLES": _show_parser("TABLES"), 522 "VIEWS": _show_parser("VIEWS"), 523 "TERSE VIEWS": _show_parser("VIEWS"), 524 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 525 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 526 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 527 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 528 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 529 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 530 "SEQUENCES": _show_parser("SEQUENCES"), 531 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 532 "COLUMNS": _show_parser("COLUMNS"), 533 "USERS": _show_parser("USERS"), 534 "TERSE USERS": _show_parser("USERS"), 535 } 536 537 CONSTRAINT_PARSERS = { 538 **parser.Parser.CONSTRAINT_PARSERS, 539 "WITH": lambda self: self._parse_with_constraint(), 540 "MASKING": lambda self: self._parse_with_constraint(), 541 "PROJECTION": lambda self: self._parse_with_constraint(), 542 "TAG": lambda self: self._parse_with_constraint(), 543 } 544 545 STAGED_FILE_SINGLE_TOKENS = { 546 TokenType.DOT, 547 TokenType.MOD, 548 TokenType.SLASH, 549 } 550 551 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 552 553 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 554 555 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 556 557 LAMBDAS = { 558 **parser.Parser.LAMBDAS, 559 TokenType.ARROW: lambda self, expressions: self.expression( 560 exp.Lambda, 561 this=self._replace_lambda( 562 self._parse_assignment(), 563 expressions, 564 ), 565 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 566 ), 567 } 568 569 def _negate_range( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 if not this: 573 return this 574 575 query = this.args.get("query") 576 if isinstance(this, exp.In) and isinstance(query, exp.Query): 577 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 578 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 579 # which can produce different results (most likely a SnowFlake bug). 580 # 581 # https://docs.snowflake.com/en/sql-reference/functions/in 582 # Context: https://github.com/tobymao/sqlglot/issues/3890 583 return self.expression( 584 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 585 ) 586 587 return self.expression(exp.Not, this=this) 588 589 def _parse_tag(self) -> exp.Tags: 590 return self.expression( 591 exp.Tags, 592 expressions=self._parse_wrapped_csv(self._parse_property), 593 ) 594 595 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 596 if self._prev.token_type != TokenType.WITH: 597 self._retreat(self._index - 1) 598 599 if self._match_text_seq("MASKING", "POLICY"): 600 policy = self._parse_column() 601 return self.expression( 602 exp.MaskingPolicyColumnConstraint, 603 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 604 expressions=self._match(TokenType.USING) 605 and self._parse_wrapped_csv(self._parse_id_var), 606 ) 607 if self._match_text_seq("PROJECTION", "POLICY"): 608 policy = self._parse_column() 609 return self.expression( 610 exp.ProjectionPolicyColumnConstraint, 611 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 612 ) 613 if self._match(TokenType.TAG): 614 return self._parse_tag() 615 616 return None 617 618 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 619 if self._match(TokenType.TAG): 620 return self._parse_tag() 621 622 return super()._parse_with_property() 623 624 def _parse_create(self) -> exp.Create | exp.Command: 625 expression = super()._parse_create() 626 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 627 # Replace the Table node with the enclosed Identifier 628 expression.this.replace(expression.this.this) 629 630 return expression 631 632 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 633 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 634 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 635 this = self._parse_var() or self._parse_type() 636 637 if not this: 638 return None 639 640 self._match(TokenType.COMMA) 641 expression = self._parse_bitwise() 642 this = map_date_part(this) 643 name = this.name.upper() 644 645 if name.startswith("EPOCH"): 646 if name == "EPOCH_MILLISECOND": 647 scale = 10**3 648 elif name == "EPOCH_MICROSECOND": 649 scale = 10**6 650 elif name == "EPOCH_NANOSECOND": 651 scale = 10**9 652 else: 653 scale = None 654 655 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 656 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 657 658 if scale: 659 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 660 661 return to_unix 662 663 return self.expression(exp.Extract, this=this, expression=expression) 664 665 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 666 if is_map: 667 # Keys are strings in Snowflake's objects, see also: 668 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 669 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 670 return self._parse_slice(self._parse_string()) 671 672 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 673 674 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 675 lateral = super()._parse_lateral() 676 if not lateral: 677 return lateral 678 679 if isinstance(lateral.this, exp.Explode): 680 table_alias = lateral.args.get("alias") 681 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 682 if table_alias and not table_alias.args.get("columns"): 683 table_alias.set("columns", columns) 684 elif not table_alias: 685 exp.alias_(lateral, "_flattened", table=columns, copy=False) 686 687 return lateral 688 689 def _parse_table_parts( 690 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 691 ) -> exp.Table: 692 # https://docs.snowflake.com/en/user-guide/querying-stage 693 if self._match(TokenType.STRING, advance=False): 694 table = self._parse_string() 695 elif self._match_text_seq("@", advance=False): 696 table = self._parse_location_path() 697 else: 698 table = None 699 700 if table: 701 file_format = None 702 pattern = None 703 704 wrapped = self._match(TokenType.L_PAREN) 705 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 706 if self._match_text_seq("FILE_FORMAT", "=>"): 707 file_format = self._parse_string() or super()._parse_table_parts( 708 is_db_reference=is_db_reference 709 ) 710 elif self._match_text_seq("PATTERN", "=>"): 711 pattern = self._parse_string() 712 else: 713 break 714 715 self._match(TokenType.COMMA) 716 717 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 718 else: 719 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 720 721 return table 722 723 def _parse_id_var( 724 self, 725 any_token: bool = True, 726 tokens: t.Optional[t.Collection[TokenType]] = None, 727 ) -> t.Optional[exp.Expression]: 728 if self._match_text_seq("IDENTIFIER", "("): 729 identifier = ( 730 super()._parse_id_var(any_token=any_token, tokens=tokens) 731 or self._parse_string() 732 ) 733 self._match_r_paren() 734 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 735 736 return super()._parse_id_var(any_token=any_token, tokens=tokens) 737 738 def _parse_show_snowflake(self, this: str) -> exp.Show: 739 scope = None 740 scope_kind = None 741 742 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 743 # which is syntactically valid but has no effect on the output 744 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 745 746 history = self._match_text_seq("HISTORY") 747 748 like = self._parse_string() if self._match(TokenType.LIKE) else None 749 750 if self._match(TokenType.IN): 751 if self._match_text_seq("ACCOUNT"): 752 scope_kind = "ACCOUNT" 753 elif self._match_set(self.DB_CREATABLES): 754 scope_kind = self._prev.text.upper() 755 if self._curr: 756 scope = self._parse_table_parts() 757 elif self._curr: 758 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 759 scope = self._parse_table_parts() 760 761 return self.expression( 762 exp.Show, 763 **{ 764 "terse": terse, 765 "this": this, 766 "history": history, 767 "like": like, 768 "scope": scope, 769 "scope_kind": scope_kind, 770 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 771 "limit": self._parse_limit(), 772 "from": self._parse_string() if self._match(TokenType.FROM) else None, 773 }, 774 ) 775 776 def _parse_location_property(self) -> exp.LocationProperty: 777 self._match(TokenType.EQ) 778 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 779 780 def _parse_file_location(self) -> t.Optional[exp.Expression]: 781 # Parse either a subquery or a staged file 782 return ( 783 self._parse_select(table=True, parse_subquery_alias=False) 784 if self._match(TokenType.L_PAREN, advance=False) 785 else self._parse_table_parts() 786 ) 787 788 def _parse_location_path(self) -> exp.Var: 789 parts = [self._advance_any(ignore_reserved=True)] 790 791 # We avoid consuming a comma token because external tables like @foo and @bar 792 # can be joined in a query with a comma separator, as well as closing paren 793 # in case of subqueries 794 while self._is_connected() and not self._match_set( 795 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 796 ): 797 parts.append(self._advance_any(ignore_reserved=True)) 798 799 return exp.var("".join(part.text for part in parts if part)) 800 801 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 802 this = super()._parse_lambda_arg() 803 804 if not this: 805 return this 806 807 typ = self._parse_types() 808 809 if typ: 810 return self.expression(exp.Cast, this=this, to=typ) 811 812 return this 813 814 def _parse_foreign_key(self) -> exp.ForeignKey: 815 # inlineFK, the REFERENCES columns are implied 816 if self._match(TokenType.REFERENCES, advance=False): 817 return self.expression(exp.ForeignKey) 818 819 # outoflineFK, explicitly names the columns 820 return super()._parse_foreign_key()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
822 class Tokenizer(tokens.Tokenizer): 823 STRING_ESCAPES = ["\\", "'"] 824 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 825 RAW_STRINGS = ["$$"] 826 COMMENTS = ["--", "//", ("/*", "*/")] 827 NESTED_COMMENTS = False 828 829 KEYWORDS = { 830 **tokens.Tokenizer.KEYWORDS, 831 "BYTEINT": TokenType.INT, 832 "CHAR VARYING": TokenType.VARCHAR, 833 "CHARACTER VARYING": TokenType.VARCHAR, 834 "EXCLUDE": TokenType.EXCEPT, 835 "ILIKE ANY": TokenType.ILIKE_ANY, 836 "LIKE ANY": TokenType.LIKE_ANY, 837 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 838 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 839 "MINUS": TokenType.EXCEPT, 840 "NCHAR VARYING": TokenType.VARCHAR, 841 "PUT": TokenType.COMMAND, 842 "REMOVE": TokenType.COMMAND, 843 "RM": TokenType.COMMAND, 844 "SAMPLE": TokenType.TABLE_SAMPLE, 845 "SQL_DOUBLE": TokenType.DOUBLE, 846 "SQL_VARCHAR": TokenType.VARCHAR, 847 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 848 "TAG": TokenType.TAG, 849 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 850 "TOP": TokenType.TOP, 851 "WAREHOUSE": TokenType.WAREHOUSE, 852 "STREAMLIT": TokenType.STREAMLIT, 853 } 854 KEYWORDS.pop("/*+") 855 856 SINGLE_TOKENS = { 857 **tokens.Tokenizer.SINGLE_TOKENS, 858 "$": TokenType.PARAMETER, 859 } 860 861 VAR_SINGLE_TOKENS = {"$"} 862 863 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
865 class Generator(generator.Generator): 866 PARAMETER_TOKEN = "$" 867 MATCHED_BY_SOURCE = False 868 SINGLE_STRING_INTERVAL = True 869 JOIN_HINTS = False 870 TABLE_HINTS = False 871 QUERY_HINTS = False 872 AGGREGATE_FILTER_SUPPORTED = False 873 SUPPORTS_TABLE_COPY = False 874 COLLATE_IS_FUNC = True 875 LIMIT_ONLY_LITERALS = True 876 JSON_KEY_VALUE_PAIR_SEP = "," 877 INSERT_OVERWRITE = " OVERWRITE INTO" 878 STRUCT_DELIMITER = ("(", ")") 879 COPY_PARAMS_ARE_WRAPPED = False 880 COPY_PARAMS_EQ_REQUIRED = True 881 STAR_EXCEPT = "EXCLUDE" 882 SUPPORTS_EXPLODING_PROJECTIONS = False 883 ARRAY_CONCAT_IS_VAR_LEN = False 884 SUPPORTS_CONVERT_TIMEZONE = True 885 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 886 SUPPORTS_MEDIAN = True 887 ARRAY_SIZE_NAME = "ARRAY_SIZE" 888 889 TRANSFORMS = { 890 **generator.Generator.TRANSFORMS, 891 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 892 exp.ArgMax: rename_func("MAX_BY"), 893 exp.ArgMin: rename_func("MIN_BY"), 894 exp.Array: inline_array_sql, 895 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 896 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 897 exp.AtTimeZone: lambda self, e: self.func( 898 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 899 ), 900 exp.BitwiseOr: rename_func("BITOR"), 901 exp.BitwiseXor: rename_func("BITXOR"), 902 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 903 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 904 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 905 exp.DateAdd: date_delta_sql("DATEADD"), 906 exp.DateDiff: date_delta_sql("DATEDIFF"), 907 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 908 exp.DatetimeDiff: timestampdiff_sql, 909 exp.DateStrToDate: datestrtodate_sql, 910 exp.DayOfMonth: rename_func("DAYOFMONTH"), 911 exp.DayOfWeek: rename_func("DAYOFWEEK"), 912 exp.DayOfYear: rename_func("DAYOFYEAR"), 913 exp.Explode: rename_func("FLATTEN"), 914 exp.Extract: rename_func("DATE_PART"), 915 exp.FromTimeZone: lambda self, e: self.func( 916 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 917 ), 918 exp.GenerateSeries: lambda self, e: self.func( 919 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 920 ), 921 exp.GroupConcat: rename_func("LISTAGG"), 922 exp.If: if_sql(name="IFF", false_value="NULL"), 923 exp.JSONExtractArray: _json_extract_value_array_sql, 924 exp.JSONExtractScalar: lambda self, e: self.func( 925 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 926 ), 927 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 928 exp.JSONPathRoot: lambda *_: "", 929 exp.JSONValueArray: _json_extract_value_array_sql, 930 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 931 exp.LogicalOr: rename_func("BOOLOR_AGG"), 932 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 933 exp.MakeInterval: no_make_interval_sql, 934 exp.Max: max_or_greatest, 935 exp.Min: min_or_least, 936 exp.ParseJSON: lambda self, e: self.func( 937 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 938 ), 939 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 940 exp.PercentileCont: transforms.preprocess( 941 [transforms.add_within_group_for_percentiles] 942 ), 943 exp.PercentileDisc: transforms.preprocess( 944 [transforms.add_within_group_for_percentiles] 945 ), 946 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 947 exp.RegexpExtract: _regexpextract_sql, 948 exp.RegexpExtractAll: _regexpextract_sql, 949 exp.RegexpILike: _regexpilike_sql, 950 exp.Rand: rename_func("RANDOM"), 951 exp.Select: transforms.preprocess( 952 [ 953 transforms.eliminate_distinct_on, 954 transforms.explode_to_unnest(), 955 transforms.eliminate_semi_and_anti_joins, 956 _transform_generate_date_array, 957 ] 958 ), 959 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 960 exp.SHA: rename_func("SHA1"), 961 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 962 exp.StartsWith: rename_func("STARTSWITH"), 963 exp.StrPosition: lambda self, e: self.func( 964 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 965 ), 966 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 967 exp.Stuff: rename_func("INSERT"), 968 exp.TimeAdd: date_delta_sql("TIMEADD"), 969 exp.Timestamp: no_timestamp_sql, 970 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 971 exp.TimestampDiff: lambda self, e: self.func( 972 "TIMESTAMPDIFF", e.unit, e.expression, e.this 973 ), 974 exp.TimestampTrunc: timestamptrunc_sql(), 975 exp.TimeStrToTime: timestrtotime_sql, 976 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 977 exp.ToArray: rename_func("TO_ARRAY"), 978 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 979 exp.ToDouble: rename_func("TO_DOUBLE"), 980 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 981 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 982 exp.TsOrDsToDate: lambda self, e: self.func( 983 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 984 ), 985 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 986 exp.Uuid: rename_func("UUID_STRING"), 987 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 988 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 989 exp.Xor: rename_func("BOOLXOR"), 990 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 991 rename_func("EDITDISTANCE") 992 ), 993 } 994 995 SUPPORTED_JSON_PATH_PARTS = { 996 exp.JSONPathKey, 997 exp.JSONPathRoot, 998 exp.JSONPathSubscript, 999 } 1000 1001 TYPE_MAPPING = { 1002 **generator.Generator.TYPE_MAPPING, 1003 exp.DataType.Type.NESTED: "OBJECT", 1004 exp.DataType.Type.STRUCT: "OBJECT", 1005 } 1006 1007 PROPERTIES_LOCATION = { 1008 **generator.Generator.PROPERTIES_LOCATION, 1009 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1010 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1011 } 1012 1013 UNSUPPORTED_VALUES_EXPRESSIONS = { 1014 exp.Map, 1015 exp.StarMap, 1016 exp.Struct, 1017 exp.VarMap, 1018 } 1019 1020 def with_properties(self, properties: exp.Properties) -> str: 1021 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1022 1023 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1024 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1025 values_as_table = False 1026 1027 return super().values_sql(expression, values_as_table=values_as_table) 1028 1029 def datatype_sql(self, expression: exp.DataType) -> str: 1030 expressions = expression.expressions 1031 if ( 1032 expressions 1033 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1034 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1035 ): 1036 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1037 return "OBJECT" 1038 1039 return super().datatype_sql(expression) 1040 1041 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1042 return self.func( 1043 "TO_NUMBER", 1044 expression.this, 1045 expression.args.get("format"), 1046 expression.args.get("precision"), 1047 expression.args.get("scale"), 1048 ) 1049 1050 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1051 milli = expression.args.get("milli") 1052 if milli is not None: 1053 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1054 expression.set("nano", milli_to_nano) 1055 1056 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1057 1058 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1059 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1060 return self.func("TO_GEOGRAPHY", expression.this) 1061 if expression.is_type(exp.DataType.Type.GEOMETRY): 1062 return self.func("TO_GEOMETRY", expression.this) 1063 1064 return super().cast_sql(expression, safe_prefix=safe_prefix) 1065 1066 def trycast_sql(self, expression: exp.TryCast) -> str: 1067 value = expression.this 1068 1069 if value.type is None: 1070 from sqlglot.optimizer.annotate_types import annotate_types 1071 1072 value = annotate_types(value) 1073 1074 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1075 return super().trycast_sql(expression) 1076 1077 # TRY_CAST only works for string values in Snowflake 1078 return self.cast_sql(expression) 1079 1080 def log_sql(self, expression: exp.Log) -> str: 1081 if not expression.expression: 1082 return self.func("LN", expression.this) 1083 1084 return super().log_sql(expression) 1085 1086 def unnest_sql(self, expression: exp.Unnest) -> str: 1087 unnest_alias = expression.args.get("alias") 1088 offset = expression.args.get("offset") 1089 1090 columns = [ 1091 exp.to_identifier("seq"), 1092 exp.to_identifier("key"), 1093 exp.to_identifier("path"), 1094 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1095 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1096 or exp.to_identifier("value"), 1097 exp.to_identifier("this"), 1098 ] 1099 1100 if unnest_alias: 1101 unnest_alias.set("columns", columns) 1102 else: 1103 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1104 1105 table_input = self.sql(expression.expressions[0]) 1106 if not table_input.startswith("INPUT =>"): 1107 table_input = f"INPUT => {table_input}" 1108 1109 explode = f"TABLE(FLATTEN({table_input}))" 1110 alias = self.sql(unnest_alias) 1111 alias = f" AS {alias}" if alias else "" 1112 return f"{explode}{alias}" 1113 1114 def show_sql(self, expression: exp.Show) -> str: 1115 terse = "TERSE " if expression.args.get("terse") else "" 1116 history = " HISTORY" if expression.args.get("history") else "" 1117 like = self.sql(expression, "like") 1118 like = f" LIKE {like}" if like else "" 1119 1120 scope = self.sql(expression, "scope") 1121 scope = f" {scope}" if scope else "" 1122 1123 scope_kind = self.sql(expression, "scope_kind") 1124 if scope_kind: 1125 scope_kind = f" IN {scope_kind}" 1126 1127 starts_with = self.sql(expression, "starts_with") 1128 if starts_with: 1129 starts_with = f" STARTS WITH {starts_with}" 1130 1131 limit = self.sql(expression, "limit") 1132 1133 from_ = self.sql(expression, "from") 1134 if from_: 1135 from_ = f" FROM {from_}" 1136 1137 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1138 1139 def describe_sql(self, expression: exp.Describe) -> str: 1140 # Default to table if kind is unknown 1141 kind_value = expression.args.get("kind") or "TABLE" 1142 kind = f" {kind_value}" if kind_value else "" 1143 this = f" {self.sql(expression, 'this')}" 1144 expressions = self.expressions(expression, flat=True) 1145 expressions = f" {expressions}" if expressions else "" 1146 return f"DESCRIBE{kind}{this}{expressions}" 1147 1148 def generatedasidentitycolumnconstraint_sql( 1149 self, expression: exp.GeneratedAsIdentityColumnConstraint 1150 ) -> str: 1151 start = expression.args.get("start") 1152 start = f" START {start}" if start else "" 1153 increment = expression.args.get("increment") 1154 increment = f" INCREMENT {increment}" if increment else "" 1155 return f"AUTOINCREMENT{start}{increment}" 1156 1157 def cluster_sql(self, expression: exp.Cluster) -> str: 1158 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1159 1160 def struct_sql(self, expression: exp.Struct) -> str: 1161 keys = [] 1162 values = [] 1163 1164 for i, e in enumerate(expression.expressions): 1165 if isinstance(e, exp.PropertyEQ): 1166 keys.append( 1167 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1168 ) 1169 values.append(e.expression) 1170 else: 1171 keys.append(exp.Literal.string(f"_{i}")) 1172 values.append(e) 1173 1174 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1175 1176 @unsupported_args("weight", "accuracy") 1177 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1178 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1179 1180 def alterset_sql(self, expression: exp.AlterSet) -> str: 1181 exprs = self.expressions(expression, flat=True) 1182 exprs = f" {exprs}" if exprs else "" 1183 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1184 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1185 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1186 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1187 tag = self.expressions(expression, key="tag", flat=True) 1188 tag = f" TAG {tag}" if tag else "" 1189 1190 return f"SET{exprs}{file_format}{copy_options}{tag}" 1191 1192 def strtotime_sql(self, expression: exp.StrToTime): 1193 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1194 return self.func( 1195 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1196 ) 1197 1198 def timestampsub_sql(self, expression: exp.TimestampSub): 1199 return self.sql( 1200 exp.TimestampAdd( 1201 this=expression.this, 1202 expression=expression.expression * -1, 1203 unit=expression.unit, 1204 ) 1205 ) 1206 1207 def jsonextract_sql(self, expression: exp.JSONExtract): 1208 this = expression.this 1209 1210 # JSON strings are valid coming from other dialects such as BQ 1211 return self.func( 1212 "GET_PATH", 1213 exp.ParseJSON(this=this) if this.is_string else this, 1214 expression.expression, 1215 ) 1216 1217 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1218 this = expression.this 1219 if not isinstance(this, exp.TsOrDsToTimestamp): 1220 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1221 1222 return self.func("TO_CHAR", this, self.format_time(expression)) 1223 1224 def datesub_sql(self, expression: exp.DateSub) -> str: 1225 value = expression.expression 1226 if value: 1227 value.replace(value * (-1)) 1228 else: 1229 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1230 1231 return date_delta_sql("DATEADD")(self, expression) 1232 1233 def select_sql(self, expression: exp.Select) -> str: 1234 limit = expression.args.get("limit") 1235 offset = expression.args.get("offset") 1236 if offset and not limit: 1237 expression.limit(exp.Null(), copy=False) 1238 return super().select_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1029 def datatype_sql(self, expression: exp.DataType) -> str: 1030 expressions = expression.expressions 1031 if ( 1032 expressions 1033 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1034 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1035 ): 1036 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1037 return "OBJECT" 1038 1039 return super().datatype_sql(expression)
1050 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1051 milli = expression.args.get("milli") 1052 if milli is not None: 1053 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1054 expression.set("nano", milli_to_nano) 1055 1056 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1058 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1059 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1060 return self.func("TO_GEOGRAPHY", expression.this) 1061 if expression.is_type(exp.DataType.Type.GEOMETRY): 1062 return self.func("TO_GEOMETRY", expression.this) 1063 1064 return super().cast_sql(expression, safe_prefix=safe_prefix)
1066 def trycast_sql(self, expression: exp.TryCast) -> str: 1067 value = expression.this 1068 1069 if value.type is None: 1070 from sqlglot.optimizer.annotate_types import annotate_types 1071 1072 value = annotate_types(value) 1073 1074 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1075 return super().trycast_sql(expression) 1076 1077 # TRY_CAST only works for string values in Snowflake 1078 return self.cast_sql(expression)
1086 def unnest_sql(self, expression: exp.Unnest) -> str: 1087 unnest_alias = expression.args.get("alias") 1088 offset = expression.args.get("offset") 1089 1090 columns = [ 1091 exp.to_identifier("seq"), 1092 exp.to_identifier("key"), 1093 exp.to_identifier("path"), 1094 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1095 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1096 or exp.to_identifier("value"), 1097 exp.to_identifier("this"), 1098 ] 1099 1100 if unnest_alias: 1101 unnest_alias.set("columns", columns) 1102 else: 1103 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1104 1105 table_input = self.sql(expression.expressions[0]) 1106 if not table_input.startswith("INPUT =>"): 1107 table_input = f"INPUT => {table_input}" 1108 1109 explode = f"TABLE(FLATTEN({table_input}))" 1110 alias = self.sql(unnest_alias) 1111 alias = f" AS {alias}" if alias else "" 1112 return f"{explode}{alias}"
1114 def show_sql(self, expression: exp.Show) -> str: 1115 terse = "TERSE " if expression.args.get("terse") else "" 1116 history = " HISTORY" if expression.args.get("history") else "" 1117 like = self.sql(expression, "like") 1118 like = f" LIKE {like}" if like else "" 1119 1120 scope = self.sql(expression, "scope") 1121 scope = f" {scope}" if scope else "" 1122 1123 scope_kind = self.sql(expression, "scope_kind") 1124 if scope_kind: 1125 scope_kind = f" IN {scope_kind}" 1126 1127 starts_with = self.sql(expression, "starts_with") 1128 if starts_with: 1129 starts_with = f" STARTS WITH {starts_with}" 1130 1131 limit = self.sql(expression, "limit") 1132 1133 from_ = self.sql(expression, "from") 1134 if from_: 1135 from_ = f" FROM {from_}" 1136 1137 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1139 def describe_sql(self, expression: exp.Describe) -> str: 1140 # Default to table if kind is unknown 1141 kind_value = expression.args.get("kind") or "TABLE" 1142 kind = f" {kind_value}" if kind_value else "" 1143 this = f" {self.sql(expression, 'this')}" 1144 expressions = self.expressions(expression, flat=True) 1145 expressions = f" {expressions}" if expressions else "" 1146 return f"DESCRIBE{kind}{this}{expressions}"
1148 def generatedasidentitycolumnconstraint_sql( 1149 self, expression: exp.GeneratedAsIdentityColumnConstraint 1150 ) -> str: 1151 start = expression.args.get("start") 1152 start = f" START {start}" if start else "" 1153 increment = expression.args.get("increment") 1154 increment = f" INCREMENT {increment}" if increment else "" 1155 return f"AUTOINCREMENT{start}{increment}"
1160 def struct_sql(self, expression: exp.Struct) -> str: 1161 keys = [] 1162 values = [] 1163 1164 for i, e in enumerate(expression.expressions): 1165 if isinstance(e, exp.PropertyEQ): 1166 keys.append( 1167 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1168 ) 1169 values.append(e.expression) 1170 else: 1171 keys.append(exp.Literal.string(f"_{i}")) 1172 values.append(e) 1173 1174 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1180 def alterset_sql(self, expression: exp.AlterSet) -> str: 1181 exprs = self.expressions(expression, flat=True) 1182 exprs = f" {exprs}" if exprs else "" 1183 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1184 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1185 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1186 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1187 tag = self.expressions(expression, key="tag", flat=True) 1188 tag = f" TAG {tag}" if tag else "" 1189 1190 return f"SET{exprs}{file_format}{copy_options}{tag}"
1224 def datesub_sql(self, expression: exp.DateSub) -> str: 1225 value = expression.expression 1226 if value: 1227 value.replace(value * (-1)) 1228 else: 1229 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1230 1231 return date_delta_sql("DATEADD")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql