sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_timestamp_from_parts, 13 date_delta_sql, 14 date_trunc_to_time, 15 datestrtodate_sql, 16 build_formatted_time, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 rename_func, 22 timestamptrunc_sql, 23 timestrtotime_sql, 24 var_map_sql, 25 map_date_part, 26 no_timestamp_sql, 27 strposition_sql, 28 timestampdiff_sql, 29 no_make_interval_sql, 30 groupconcat_sql, 31) 32from sqlglot.generator import unsupported_args 33from sqlglot.helper import flatten, is_float, is_int, seq_get 34from sqlglot.tokens import TokenType 35 36if t.TYPE_CHECKING: 37 from sqlglot._typing import E, B 38 39 40# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 41def _build_datetime( 42 name: str, kind: exp.DataType.Type, safe: bool = False 43) -> t.Callable[[t.List], exp.Func]: 44 def _builder(args: t.List) -> exp.Func: 45 value = seq_get(args, 0) 46 scale_or_fmt = seq_get(args, 1) 47 48 int_value = value is not None and is_int(value.name) 49 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 50 51 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 52 # Converts calls like `TO_TIME('01:02:03')` into casts 53 if len(args) == 1 and value.is_string and not int_value: 54 return ( 55 exp.TryCast(this=value, to=exp.DataType.build(kind)) 56 if safe 57 else exp.cast(value, kind) 58 ) 59 60 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 61 # cases so we can transpile them, since they're relatively common 62 if kind == exp.DataType.Type.TIMESTAMP: 63 if not safe and (int_value or int_scale_or_fmt): 64 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 65 # it's not easily transpilable 66 return exp.UnixToTime(this=value, scale=scale_or_fmt) 67 if not int_scale_or_fmt and not is_float(value.name): 68 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 69 expr.set("safe", safe) 70 return expr 71 72 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 73 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 74 formatted_exp = build_formatted_time(klass, "snowflake")(args) 75 formatted_exp.set("safe", safe) 76 return formatted_exp 77 78 return exp.Anonymous(this=name, expressions=args) 79 80 return _builder 81 82 83def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 84 expression = parser.build_var_map(args) 85 86 if isinstance(expression, exp.StarMap): 87 return expression 88 89 return exp.Struct( 90 expressions=[ 91 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 92 ] 93 ) 94 95 96def _build_datediff(args: t.List) -> exp.DateDiff: 97 return exp.DateDiff( 98 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 99 ) 100 101 102def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 103 def _builder(args: t.List) -> E: 104 return expr_type( 105 this=seq_get(args, 2), 106 expression=seq_get(args, 1), 107 unit=map_date_part(seq_get(args, 0)), 108 ) 109 110 return _builder 111 112 113def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 114 def _builder(args: t.List) -> B | exp.Anonymous: 115 if len(args) == 3: 116 return exp.Anonymous(this=name, expressions=args) 117 118 return binary_from_function(expr_type)(args) 119 120 return _builder 121 122 123# https://docs.snowflake.com/en/sql-reference/functions/div0 124def _build_if_from_div0(args: t.List) -> exp.If: 125 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 126 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 127 128 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 129 exp.Is(this=lhs, expression=exp.null()).not_() 130 ) 131 true = exp.Literal.number(0) 132 false = exp.Div(this=lhs, expression=rhs) 133 return exp.If(this=cond, true=true, false=false) 134 135 136# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 137def _build_if_from_zeroifnull(args: t.List) -> exp.If: 138 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 139 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 140 141 142# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 143def _build_if_from_nullifzero(args: t.List) -> exp.If: 144 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 145 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 146 147 148def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 149 flag = expression.text("flag") 150 151 if "i" not in flag: 152 flag += "i" 153 154 return self.func( 155 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 156 ) 157 158 159def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 160 regexp_replace = exp.RegexpReplace.from_arg_list(args) 161 162 if not regexp_replace.args.get("replacement"): 163 regexp_replace.set("replacement", exp.Literal.string("")) 164 165 return regexp_replace 166 167 168def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 169 def _parse(self: Snowflake.Parser) -> exp.Show: 170 return self._parse_show_snowflake(*args, **kwargs) 171 172 return _parse 173 174 175def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 176 trunc = date_trunc_to_time(args) 177 trunc.set("unit", map_date_part(trunc.args["unit"])) 178 return trunc 179 180 181def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 182 """ 183 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 184 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 185 186 Example: 187 >>> from sqlglot import parse_one 188 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 189 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 190 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 191 """ 192 if isinstance(expression, exp.Pivot): 193 if expression.unpivot: 194 expression = transforms.unqualify_columns(expression) 195 else: 196 field = expression.args.get("field") 197 field_expr = seq_get(field.expressions if field else [], 0) 198 199 if isinstance(field_expr, exp.PivotAny): 200 unqualified_field_expr = transforms.unqualify_columns(field_expr) 201 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 202 203 return expression 204 205 206def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 207 assert isinstance(expression, exp.Create) 208 209 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 210 if expression.this in exp.DataType.NESTED_TYPES: 211 expression.set("expressions", None) 212 return expression 213 214 props = expression.args.get("properties") 215 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 216 for schema_expression in expression.this.expressions: 217 if isinstance(schema_expression, exp.ColumnDef): 218 column_type = schema_expression.kind 219 if isinstance(column_type, exp.DataType): 220 column_type.transform(_flatten_structured_type, copy=False) 221 222 return expression 223 224 225def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 226 generate_date_array = unnest.expressions[0] 227 start = generate_date_array.args.get("start") 228 end = generate_date_array.args.get("end") 229 step = generate_date_array.args.get("step") 230 231 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 232 return 233 234 unit = step.args.get("unit") 235 236 unnest_alias = unnest.args.get("alias") 237 if unnest_alias: 238 unnest_alias = unnest_alias.copy() 239 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 240 else: 241 sequence_value_name = "value" 242 243 # We'll add the next sequence value to the starting date and project the result 244 date_add = _build_date_time_add(exp.DateAdd)( 245 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 246 ).as_(sequence_value_name) 247 248 # We use DATEDIFF to compute the number of sequence values needed 249 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 250 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 251 ) 252 253 unnest.set("expressions", [number_sequence]) 254 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 255 256 257def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 258 if isinstance(expression, exp.Select): 259 for generate_date_array in expression.find_all(exp.GenerateDateArray): 260 parent = generate_date_array.parent 261 262 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 263 # query is the following (it'll be unnested properly on the next iteration due to copy): 264 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 265 if not isinstance(parent, exp.Unnest): 266 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 267 generate_date_array.replace( 268 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 269 ) 270 271 if ( 272 isinstance(parent, exp.Unnest) 273 and isinstance(parent.parent, (exp.From, exp.Join)) 274 and len(parent.expressions) == 1 275 ): 276 _unnest_generate_date_array(parent) 277 278 return expression 279 280 281def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 282 def _builder(args: t.List) -> E: 283 return expr_type( 284 this=seq_get(args, 0), 285 expression=seq_get(args, 1), 286 position=seq_get(args, 2), 287 occurrence=seq_get(args, 3), 288 parameters=seq_get(args, 4), 289 group=seq_get(args, 5) or exp.Literal.number(0), 290 ) 291 292 return _builder 293 294 295def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 296 # Other dialects don't support all of the following parameters, so we need to 297 # generate default values as necessary to ensure the transpilation is correct 298 group = expression.args.get("group") 299 300 # To avoid generating all these default values, we set group to None if 301 # it's 0 (also default value) which doesn't trigger the following chain 302 if group and group.name == "0": 303 group = None 304 305 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 306 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 307 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 308 309 return self.func( 310 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 311 expression.this, 312 expression.expression, 313 position, 314 occurrence, 315 parameters, 316 group, 317 ) 318 319 320def _json_extract_value_array_sql( 321 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 322) -> str: 323 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 324 ident = exp.to_identifier("x") 325 326 if isinstance(expression, exp.JSONValueArray): 327 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 328 else: 329 this = exp.ParseJSON(this=f"TO_JSON({ident})") 330 331 transform_lambda = exp.Lambda(expressions=[ident], this=this) 332 333 return self.func("TRANSFORM", json_extract, transform_lambda) 334 335 336class Snowflake(Dialect): 337 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 338 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 339 NULL_ORDERING = "nulls_are_large" 340 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 341 SUPPORTS_USER_DEFINED_TYPES = False 342 SUPPORTS_SEMI_ANTI_JOIN = False 343 PREFER_CTE_ALIAS_COLUMN = True 344 TABLESAMPLE_SIZE_IS_PERCENT = True 345 COPY_PARAMS_ARE_CSV = False 346 ARRAY_AGG_INCLUDES_NULLS = None 347 348 TIME_MAPPING = { 349 "YYYY": "%Y", 350 "yyyy": "%Y", 351 "YY": "%y", 352 "yy": "%y", 353 "MMMM": "%B", 354 "mmmm": "%B", 355 "MON": "%b", 356 "mon": "%b", 357 "MM": "%m", 358 "mm": "%m", 359 "DD": "%d", 360 "dd": "%-d", 361 "DY": "%a", 362 "dy": "%w", 363 "HH24": "%H", 364 "hh24": "%H", 365 "HH12": "%I", 366 "hh12": "%I", 367 "MI": "%M", 368 "mi": "%M", 369 "SS": "%S", 370 "ss": "%S", 371 "FF6": "%f", 372 "ff6": "%f", 373 } 374 375 def quote_identifier(self, expression: E, identify: bool = True) -> E: 376 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 377 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 378 if ( 379 isinstance(expression, exp.Identifier) 380 and isinstance(expression.parent, exp.Table) 381 and expression.name.lower() == "dual" 382 ): 383 return expression # type: ignore 384 385 return super().quote_identifier(expression, identify=identify) 386 387 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 388 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 389 SINGLE_TOKENS.pop("$") 390 391 class Parser(parser.Parser): 392 IDENTIFY_PIVOT_STRINGS = True 393 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 394 COLON_IS_VARIANT_EXTRACT = True 395 396 ID_VAR_TOKENS = { 397 *parser.Parser.ID_VAR_TOKENS, 398 TokenType.MATCH_CONDITION, 399 } 400 401 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 402 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 403 404 FUNCTIONS = { 405 **parser.Parser.FUNCTIONS, 406 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 407 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 408 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 409 this=seq_get(args, 1), expression=seq_get(args, 0) 410 ), 411 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 412 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 413 start=seq_get(args, 0), 414 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 415 step=seq_get(args, 2), 416 ), 417 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 418 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 419 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 420 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 421 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 422 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 423 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 424 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 425 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 426 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 427 "DATE_TRUNC": _date_trunc_to_time, 428 "DATEADD": _build_date_time_add(exp.DateAdd), 429 "DATEDIFF": _build_datediff, 430 "DIV0": _build_if_from_div0, 431 "EDITDISTANCE": lambda args: exp.Levenshtein( 432 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 433 ), 434 "FLATTEN": exp.Explode.from_arg_list, 435 "GET_PATH": lambda args, dialect: exp.JSONExtract( 436 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 437 ), 438 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 439 "IFF": exp.If.from_arg_list, 440 "LAST_DAY": lambda args: exp.LastDay( 441 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 442 ), 443 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 444 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 445 "NULLIFZERO": _build_if_from_nullifzero, 446 "OBJECT_CONSTRUCT": _build_object_construct, 447 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 448 "REGEXP_REPLACE": _build_regexp_replace, 449 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 450 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 451 "RLIKE": exp.RegexpLike.from_arg_list, 452 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 453 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 454 "TIMEADD": _build_date_time_add(exp.TimeAdd), 455 "TIMEDIFF": _build_datediff, 456 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 457 "TIMESTAMPDIFF": _build_datediff, 458 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 459 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 460 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 461 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 462 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 463 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 464 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 465 "TRY_TO_TIMESTAMP": _build_datetime( 466 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 467 ), 468 "TO_CHAR": build_timetostr_or_tochar, 469 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 470 "TO_NUMBER": lambda args: exp.ToNumber( 471 this=seq_get(args, 0), 472 format=seq_get(args, 1), 473 precision=seq_get(args, 2), 474 scale=seq_get(args, 3), 475 ), 476 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 477 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 478 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 479 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 480 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 481 "TO_VARCHAR": exp.ToChar.from_arg_list, 482 "ZEROIFNULL": _build_if_from_zeroifnull, 483 } 484 485 FUNCTION_PARSERS = { 486 **parser.Parser.FUNCTION_PARSERS, 487 "DATE_PART": lambda self: self._parse_date_part(), 488 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 489 "LISTAGG": lambda self: self._parse_string_agg(), 490 } 491 FUNCTION_PARSERS.pop("TRIM") 492 493 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 494 495 RANGE_PARSERS = { 496 **parser.Parser.RANGE_PARSERS, 497 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 498 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 499 } 500 501 ALTER_PARSERS = { 502 **parser.Parser.ALTER_PARSERS, 503 "UNSET": lambda self: self.expression( 504 exp.Set, 505 tag=self._match_text_seq("TAG"), 506 expressions=self._parse_csv(self._parse_id_var), 507 unset=True, 508 ), 509 } 510 511 STATEMENT_PARSERS = { 512 **parser.Parser.STATEMENT_PARSERS, 513 TokenType.PUT: lambda self: self._parse_put(), 514 TokenType.SHOW: lambda self: self._parse_show(), 515 } 516 517 PROPERTY_PARSERS = { 518 **parser.Parser.PROPERTY_PARSERS, 519 "LOCATION": lambda self: self._parse_location_property(), 520 "TAG": lambda self: self._parse_tag(), 521 "USING": lambda self: self._match_text_seq("TEMPLATE") 522 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 523 } 524 525 TYPE_CONVERTERS = { 526 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 527 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 528 } 529 530 SHOW_PARSERS = { 531 "DATABASES": _show_parser("DATABASES"), 532 "TERSE DATABASES": _show_parser("DATABASES"), 533 "SCHEMAS": _show_parser("SCHEMAS"), 534 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 535 "OBJECTS": _show_parser("OBJECTS"), 536 "TERSE OBJECTS": _show_parser("OBJECTS"), 537 "TABLES": _show_parser("TABLES"), 538 "TERSE TABLES": _show_parser("TABLES"), 539 "VIEWS": _show_parser("VIEWS"), 540 "TERSE VIEWS": _show_parser("VIEWS"), 541 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 542 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 543 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 544 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 545 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 546 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 547 "SEQUENCES": _show_parser("SEQUENCES"), 548 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 549 "COLUMNS": _show_parser("COLUMNS"), 550 "USERS": _show_parser("USERS"), 551 "TERSE USERS": _show_parser("USERS"), 552 "FUNCTIONS": _show_parser("FUNCTIONS"), 553 "PROCEDURES": _show_parser("PROCEDURES"), 554 "WAREHOUSES": _show_parser("WAREHOUSES"), 555 } 556 557 CONSTRAINT_PARSERS = { 558 **parser.Parser.CONSTRAINT_PARSERS, 559 "WITH": lambda self: self._parse_with_constraint(), 560 "MASKING": lambda self: self._parse_with_constraint(), 561 "PROJECTION": lambda self: self._parse_with_constraint(), 562 "TAG": lambda self: self._parse_with_constraint(), 563 } 564 565 STAGED_FILE_SINGLE_TOKENS = { 566 TokenType.DOT, 567 TokenType.MOD, 568 TokenType.SLASH, 569 } 570 571 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 572 573 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 574 575 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 576 577 LAMBDAS = { 578 **parser.Parser.LAMBDAS, 579 TokenType.ARROW: lambda self, expressions: self.expression( 580 exp.Lambda, 581 this=self._replace_lambda( 582 self._parse_assignment(), 583 expressions, 584 ), 585 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 586 ), 587 } 588 589 def _parse_use(self) -> exp.Use: 590 if self._match_text_seq("SECONDARY", "ROLES"): 591 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 592 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 593 return self.expression( 594 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 595 ) 596 597 return super()._parse_use() 598 599 def _negate_range( 600 self, this: t.Optional[exp.Expression] = None 601 ) -> t.Optional[exp.Expression]: 602 if not this: 603 return this 604 605 query = this.args.get("query") 606 if isinstance(this, exp.In) and isinstance(query, exp.Query): 607 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 608 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 609 # which can produce different results (most likely a SnowFlake bug). 610 # 611 # https://docs.snowflake.com/en/sql-reference/functions/in 612 # Context: https://github.com/tobymao/sqlglot/issues/3890 613 return self.expression( 614 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 615 ) 616 617 return self.expression(exp.Not, this=this) 618 619 def _parse_tag(self) -> exp.Tags: 620 return self.expression( 621 exp.Tags, 622 expressions=self._parse_wrapped_csv(self._parse_property), 623 ) 624 625 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 626 if self._prev.token_type != TokenType.WITH: 627 self._retreat(self._index - 1) 628 629 if self._match_text_seq("MASKING", "POLICY"): 630 policy = self._parse_column() 631 return self.expression( 632 exp.MaskingPolicyColumnConstraint, 633 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 634 expressions=self._match(TokenType.USING) 635 and self._parse_wrapped_csv(self._parse_id_var), 636 ) 637 if self._match_text_seq("PROJECTION", "POLICY"): 638 policy = self._parse_column() 639 return self.expression( 640 exp.ProjectionPolicyColumnConstraint, 641 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 642 ) 643 if self._match(TokenType.TAG): 644 return self._parse_tag() 645 646 return None 647 648 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 649 if self._match(TokenType.TAG): 650 return self._parse_tag() 651 652 return super()._parse_with_property() 653 654 def _parse_create(self) -> exp.Create | exp.Command: 655 expression = super()._parse_create() 656 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 657 # Replace the Table node with the enclosed Identifier 658 expression.this.replace(expression.this.this) 659 660 return expression 661 662 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 663 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 664 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 665 this = self._parse_var() or self._parse_type() 666 667 if not this: 668 return None 669 670 self._match(TokenType.COMMA) 671 expression = self._parse_bitwise() 672 this = map_date_part(this) 673 name = this.name.upper() 674 675 if name.startswith("EPOCH"): 676 if name == "EPOCH_MILLISECOND": 677 scale = 10**3 678 elif name == "EPOCH_MICROSECOND": 679 scale = 10**6 680 elif name == "EPOCH_NANOSECOND": 681 scale = 10**9 682 else: 683 scale = None 684 685 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 686 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 687 688 if scale: 689 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 690 691 return to_unix 692 693 return self.expression(exp.Extract, this=this, expression=expression) 694 695 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 696 if is_map: 697 # Keys are strings in Snowflake's objects, see also: 698 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 699 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 700 return self._parse_slice(self._parse_string()) 701 702 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 703 704 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 705 lateral = super()._parse_lateral() 706 if not lateral: 707 return lateral 708 709 if isinstance(lateral.this, exp.Explode): 710 table_alias = lateral.args.get("alias") 711 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 712 if table_alias and not table_alias.args.get("columns"): 713 table_alias.set("columns", columns) 714 elif not table_alias: 715 exp.alias_(lateral, "_flattened", table=columns, copy=False) 716 717 return lateral 718 719 def _parse_table_parts( 720 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 721 ) -> exp.Table: 722 # https://docs.snowflake.com/en/user-guide/querying-stage 723 if self._match(TokenType.STRING, advance=False): 724 table = self._parse_string() 725 elif self._match_text_seq("@", advance=False): 726 table = self._parse_location_path() 727 else: 728 table = None 729 730 if table: 731 file_format = None 732 pattern = None 733 734 wrapped = self._match(TokenType.L_PAREN) 735 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 736 if self._match_text_seq("FILE_FORMAT", "=>"): 737 file_format = self._parse_string() or super()._parse_table_parts( 738 is_db_reference=is_db_reference 739 ) 740 elif self._match_text_seq("PATTERN", "=>"): 741 pattern = self._parse_string() 742 else: 743 break 744 745 self._match(TokenType.COMMA) 746 747 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 748 else: 749 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 750 751 return table 752 753 def _parse_table( 754 self, 755 schema: bool = False, 756 joins: bool = False, 757 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 758 parse_bracket: bool = False, 759 is_db_reference: bool = False, 760 parse_partition: bool = False, 761 ) -> t.Optional[exp.Expression]: 762 table = super()._parse_table( 763 schema=schema, 764 joins=joins, 765 alias_tokens=alias_tokens, 766 parse_bracket=parse_bracket, 767 is_db_reference=is_db_reference, 768 parse_partition=parse_partition, 769 ) 770 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 771 table_from_rows = table.this 772 for arg in exp.TableFromRows.arg_types: 773 if arg != "this": 774 table_from_rows.set(arg, table.args.get(arg)) 775 776 table = table_from_rows 777 778 return table 779 780 def _parse_id_var( 781 self, 782 any_token: bool = True, 783 tokens: t.Optional[t.Collection[TokenType]] = None, 784 ) -> t.Optional[exp.Expression]: 785 if self._match_text_seq("IDENTIFIER", "("): 786 identifier = ( 787 super()._parse_id_var(any_token=any_token, tokens=tokens) 788 or self._parse_string() 789 ) 790 self._match_r_paren() 791 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 792 793 return super()._parse_id_var(any_token=any_token, tokens=tokens) 794 795 def _parse_show_snowflake(self, this: str) -> exp.Show: 796 scope = None 797 scope_kind = None 798 799 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 800 # which is syntactically valid but has no effect on the output 801 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 802 803 history = self._match_text_seq("HISTORY") 804 805 like = self._parse_string() if self._match(TokenType.LIKE) else None 806 807 if self._match(TokenType.IN): 808 if self._match_text_seq("ACCOUNT"): 809 scope_kind = "ACCOUNT" 810 elif self._match_text_seq("CLASS"): 811 scope_kind = "CLASS" 812 scope = self._parse_table_parts() 813 elif self._match_text_seq("APPLICATION"): 814 scope_kind = "APPLICATION" 815 if self._match_text_seq("PACKAGE"): 816 scope_kind += " PACKAGE" 817 scope = self._parse_table_parts() 818 elif self._match_set(self.DB_CREATABLES): 819 scope_kind = self._prev.text.upper() 820 if self._curr: 821 scope = self._parse_table_parts() 822 elif self._curr: 823 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 824 scope = self._parse_table_parts() 825 826 return self.expression( 827 exp.Show, 828 **{ 829 "terse": terse, 830 "this": this, 831 "history": history, 832 "like": like, 833 "scope": scope, 834 "scope_kind": scope_kind, 835 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 836 "limit": self._parse_limit(), 837 "from": self._parse_string() if self._match(TokenType.FROM) else None, 838 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 839 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 840 }, 841 ) 842 843 def _parse_put(self) -> exp.Put | exp.Command: 844 if self._curr.token_type != TokenType.STRING: 845 return self._parse_as_command(self._prev) 846 847 return self.expression( 848 exp.Put, 849 this=self._parse_string(), 850 target=self._parse_location_path(), 851 properties=self._parse_properties(), 852 ) 853 854 def _parse_location_property(self) -> exp.LocationProperty: 855 self._match(TokenType.EQ) 856 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 857 858 def _parse_file_location(self) -> t.Optional[exp.Expression]: 859 # Parse either a subquery or a staged file 860 return ( 861 self._parse_select(table=True, parse_subquery_alias=False) 862 if self._match(TokenType.L_PAREN, advance=False) 863 else self._parse_table_parts() 864 ) 865 866 def _parse_location_path(self) -> exp.Var: 867 start = self._curr 868 self._advance_any(ignore_reserved=True) 869 870 # We avoid consuming a comma token because external tables like @foo and @bar 871 # can be joined in a query with a comma separator, as well as closing paren 872 # in case of subqueries 873 while self._is_connected() and not self._match_set( 874 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 875 ): 876 self._advance_any(ignore_reserved=True) 877 878 return exp.var(self._find_sql(start, self._prev)) 879 880 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 881 this = super()._parse_lambda_arg() 882 883 if not this: 884 return this 885 886 typ = self._parse_types() 887 888 if typ: 889 return self.expression(exp.Cast, this=this, to=typ) 890 891 return this 892 893 def _parse_foreign_key(self) -> exp.ForeignKey: 894 # inlineFK, the REFERENCES columns are implied 895 if self._match(TokenType.REFERENCES, advance=False): 896 return self.expression(exp.ForeignKey) 897 898 # outoflineFK, explicitly names the columns 899 return super()._parse_foreign_key() 900 901 class Tokenizer(tokens.Tokenizer): 902 STRING_ESCAPES = ["\\", "'"] 903 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 904 RAW_STRINGS = ["$$"] 905 COMMENTS = ["--", "//", ("/*", "*/")] 906 NESTED_COMMENTS = False 907 908 KEYWORDS = { 909 **tokens.Tokenizer.KEYWORDS, 910 "FILE://": TokenType.URI_START, 911 "BYTEINT": TokenType.INT, 912 "CHAR VARYING": TokenType.VARCHAR, 913 "CHARACTER VARYING": TokenType.VARCHAR, 914 "EXCLUDE": TokenType.EXCEPT, 915 "ILIKE ANY": TokenType.ILIKE_ANY, 916 "LIKE ANY": TokenType.LIKE_ANY, 917 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 918 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 919 "MINUS": TokenType.EXCEPT, 920 "NCHAR VARYING": TokenType.VARCHAR, 921 "PUT": TokenType.PUT, 922 "REMOVE": TokenType.COMMAND, 923 "RM": TokenType.COMMAND, 924 "SAMPLE": TokenType.TABLE_SAMPLE, 925 "SQL_DOUBLE": TokenType.DOUBLE, 926 "SQL_VARCHAR": TokenType.VARCHAR, 927 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 928 "TAG": TokenType.TAG, 929 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 930 "TOP": TokenType.TOP, 931 "WAREHOUSE": TokenType.WAREHOUSE, 932 "STREAMLIT": TokenType.STREAMLIT, 933 } 934 KEYWORDS.pop("/*+") 935 936 SINGLE_TOKENS = { 937 **tokens.Tokenizer.SINGLE_TOKENS, 938 "$": TokenType.PARAMETER, 939 } 940 941 VAR_SINGLE_TOKENS = {"$"} 942 943 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 944 945 class Generator(generator.Generator): 946 PARAMETER_TOKEN = "$" 947 MATCHED_BY_SOURCE = False 948 SINGLE_STRING_INTERVAL = True 949 JOIN_HINTS = False 950 TABLE_HINTS = False 951 QUERY_HINTS = False 952 AGGREGATE_FILTER_SUPPORTED = False 953 SUPPORTS_TABLE_COPY = False 954 COLLATE_IS_FUNC = True 955 LIMIT_ONLY_LITERALS = True 956 JSON_KEY_VALUE_PAIR_SEP = "," 957 INSERT_OVERWRITE = " OVERWRITE INTO" 958 STRUCT_DELIMITER = ("(", ")") 959 COPY_PARAMS_ARE_WRAPPED = False 960 COPY_PARAMS_EQ_REQUIRED = True 961 STAR_EXCEPT = "EXCLUDE" 962 SUPPORTS_EXPLODING_PROJECTIONS = False 963 ARRAY_CONCAT_IS_VAR_LEN = False 964 SUPPORTS_CONVERT_TIMEZONE = True 965 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 966 SUPPORTS_MEDIAN = True 967 ARRAY_SIZE_NAME = "ARRAY_SIZE" 968 969 TRANSFORMS = { 970 **generator.Generator.TRANSFORMS, 971 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 972 exp.ArgMax: rename_func("MAX_BY"), 973 exp.ArgMin: rename_func("MIN_BY"), 974 exp.Array: inline_array_sql, 975 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 976 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 977 exp.AtTimeZone: lambda self, e: self.func( 978 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 979 ), 980 exp.BitwiseOr: rename_func("BITOR"), 981 exp.BitwiseXor: rename_func("BITXOR"), 982 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 983 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 984 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 985 exp.DateAdd: date_delta_sql("DATEADD"), 986 exp.DateDiff: date_delta_sql("DATEDIFF"), 987 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 988 exp.DatetimeDiff: timestampdiff_sql, 989 exp.DateStrToDate: datestrtodate_sql, 990 exp.DayOfMonth: rename_func("DAYOFMONTH"), 991 exp.DayOfWeek: rename_func("DAYOFWEEK"), 992 exp.DayOfYear: rename_func("DAYOFYEAR"), 993 exp.Explode: rename_func("FLATTEN"), 994 exp.Extract: rename_func("DATE_PART"), 995 exp.FromTimeZone: lambda self, e: self.func( 996 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 997 ), 998 exp.GenerateSeries: lambda self, e: self.func( 999 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1000 ), 1001 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1002 exp.If: if_sql(name="IFF", false_value="NULL"), 1003 exp.JSONExtractArray: _json_extract_value_array_sql, 1004 exp.JSONExtractScalar: lambda self, e: self.func( 1005 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1006 ), 1007 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1008 exp.JSONPathRoot: lambda *_: "", 1009 exp.JSONValueArray: _json_extract_value_array_sql, 1010 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1011 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1012 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1013 exp.MakeInterval: no_make_interval_sql, 1014 exp.Max: max_or_greatest, 1015 exp.Min: min_or_least, 1016 exp.ParseJSON: lambda self, e: self.func( 1017 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1018 ), 1019 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1020 exp.PercentileCont: transforms.preprocess( 1021 [transforms.add_within_group_for_percentiles] 1022 ), 1023 exp.PercentileDisc: transforms.preprocess( 1024 [transforms.add_within_group_for_percentiles] 1025 ), 1026 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1027 exp.RegexpExtract: _regexpextract_sql, 1028 exp.RegexpExtractAll: _regexpextract_sql, 1029 exp.RegexpILike: _regexpilike_sql, 1030 exp.Rand: rename_func("RANDOM"), 1031 exp.Select: transforms.preprocess( 1032 [ 1033 transforms.eliminate_distinct_on, 1034 transforms.explode_projection_to_unnest(), 1035 transforms.eliminate_semi_and_anti_joins, 1036 _transform_generate_date_array, 1037 ] 1038 ), 1039 exp.SHA: rename_func("SHA1"), 1040 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1041 exp.StartsWith: rename_func("STARTSWITH"), 1042 exp.StrPosition: lambda self, e: strposition_sql( 1043 self, e, func_name="CHARINDEX", supports_position=True 1044 ), 1045 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1046 exp.Stuff: rename_func("INSERT"), 1047 exp.TimeAdd: date_delta_sql("TIMEADD"), 1048 exp.Timestamp: no_timestamp_sql, 1049 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1050 exp.TimestampDiff: lambda self, e: self.func( 1051 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1052 ), 1053 exp.TimestampTrunc: timestamptrunc_sql(), 1054 exp.TimeStrToTime: timestrtotime_sql, 1055 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1056 exp.ToArray: rename_func("TO_ARRAY"), 1057 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1058 exp.ToDouble: rename_func("TO_DOUBLE"), 1059 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1060 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1061 exp.TsOrDsToDate: lambda self, e: self.func( 1062 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1063 ), 1064 exp.TsOrDsToTime: lambda self, e: self.func( 1065 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1066 ), 1067 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1068 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1069 exp.Uuid: rename_func("UUID_STRING"), 1070 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1071 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1072 exp.Xor: rename_func("BOOLXOR"), 1073 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1074 rename_func("EDITDISTANCE") 1075 ), 1076 } 1077 1078 SUPPORTED_JSON_PATH_PARTS = { 1079 exp.JSONPathKey, 1080 exp.JSONPathRoot, 1081 exp.JSONPathSubscript, 1082 } 1083 1084 TYPE_MAPPING = { 1085 **generator.Generator.TYPE_MAPPING, 1086 exp.DataType.Type.NESTED: "OBJECT", 1087 exp.DataType.Type.STRUCT: "OBJECT", 1088 } 1089 1090 TOKEN_MAPPING = { 1091 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1092 } 1093 1094 PROPERTIES_LOCATION = { 1095 **generator.Generator.PROPERTIES_LOCATION, 1096 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1097 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1098 } 1099 1100 UNSUPPORTED_VALUES_EXPRESSIONS = { 1101 exp.Map, 1102 exp.StarMap, 1103 exp.Struct, 1104 exp.VarMap, 1105 } 1106 1107 def with_properties(self, properties: exp.Properties) -> str: 1108 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1109 1110 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1111 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1112 values_as_table = False 1113 1114 return super().values_sql(expression, values_as_table=values_as_table) 1115 1116 def datatype_sql(self, expression: exp.DataType) -> str: 1117 expressions = expression.expressions 1118 if ( 1119 expressions 1120 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1121 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1122 ): 1123 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1124 return "OBJECT" 1125 1126 return super().datatype_sql(expression) 1127 1128 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1129 return self.func( 1130 "TO_NUMBER", 1131 expression.this, 1132 expression.args.get("format"), 1133 expression.args.get("precision"), 1134 expression.args.get("scale"), 1135 ) 1136 1137 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1138 milli = expression.args.get("milli") 1139 if milli is not None: 1140 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1141 expression.set("nano", milli_to_nano) 1142 1143 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1144 1145 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1146 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1147 return self.func("TO_GEOGRAPHY", expression.this) 1148 if expression.is_type(exp.DataType.Type.GEOMETRY): 1149 return self.func("TO_GEOMETRY", expression.this) 1150 1151 return super().cast_sql(expression, safe_prefix=safe_prefix) 1152 1153 def trycast_sql(self, expression: exp.TryCast) -> str: 1154 value = expression.this 1155 1156 if value.type is None: 1157 from sqlglot.optimizer.annotate_types import annotate_types 1158 1159 value = annotate_types(value) 1160 1161 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1162 return super().trycast_sql(expression) 1163 1164 # TRY_CAST only works for string values in Snowflake 1165 return self.cast_sql(expression) 1166 1167 def log_sql(self, expression: exp.Log) -> str: 1168 if not expression.expression: 1169 return self.func("LN", expression.this) 1170 1171 return super().log_sql(expression) 1172 1173 def unnest_sql(self, expression: exp.Unnest) -> str: 1174 unnest_alias = expression.args.get("alias") 1175 offset = expression.args.get("offset") 1176 1177 columns = [ 1178 exp.to_identifier("seq"), 1179 exp.to_identifier("key"), 1180 exp.to_identifier("path"), 1181 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1182 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1183 or exp.to_identifier("value"), 1184 exp.to_identifier("this"), 1185 ] 1186 1187 if unnest_alias: 1188 unnest_alias.set("columns", columns) 1189 else: 1190 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1191 1192 table_input = self.sql(expression.expressions[0]) 1193 if not table_input.startswith("INPUT =>"): 1194 table_input = f"INPUT => {table_input}" 1195 1196 explode = f"TABLE(FLATTEN({table_input}))" 1197 alias = self.sql(unnest_alias) 1198 alias = f" AS {alias}" if alias else "" 1199 return f"{explode}{alias}" 1200 1201 def show_sql(self, expression: exp.Show) -> str: 1202 terse = "TERSE " if expression.args.get("terse") else "" 1203 history = " HISTORY" if expression.args.get("history") else "" 1204 like = self.sql(expression, "like") 1205 like = f" LIKE {like}" if like else "" 1206 1207 scope = self.sql(expression, "scope") 1208 scope = f" {scope}" if scope else "" 1209 1210 scope_kind = self.sql(expression, "scope_kind") 1211 if scope_kind: 1212 scope_kind = f" IN {scope_kind}" 1213 1214 starts_with = self.sql(expression, "starts_with") 1215 if starts_with: 1216 starts_with = f" STARTS WITH {starts_with}" 1217 1218 limit = self.sql(expression, "limit") 1219 1220 from_ = self.sql(expression, "from") 1221 if from_: 1222 from_ = f" FROM {from_}" 1223 1224 privileges = self.expressions(expression, key="privileges", flat=True) 1225 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1226 1227 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1228 1229 def describe_sql(self, expression: exp.Describe) -> str: 1230 # Default to table if kind is unknown 1231 kind_value = expression.args.get("kind") or "TABLE" 1232 kind = f" {kind_value}" if kind_value else "" 1233 this = f" {self.sql(expression, 'this')}" 1234 expressions = self.expressions(expression, flat=True) 1235 expressions = f" {expressions}" if expressions else "" 1236 return f"DESCRIBE{kind}{this}{expressions}" 1237 1238 def generatedasidentitycolumnconstraint_sql( 1239 self, expression: exp.GeneratedAsIdentityColumnConstraint 1240 ) -> str: 1241 start = expression.args.get("start") 1242 start = f" START {start}" if start else "" 1243 increment = expression.args.get("increment") 1244 increment = f" INCREMENT {increment}" if increment else "" 1245 return f"AUTOINCREMENT{start}{increment}" 1246 1247 def cluster_sql(self, expression: exp.Cluster) -> str: 1248 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1249 1250 def struct_sql(self, expression: exp.Struct) -> str: 1251 keys = [] 1252 values = [] 1253 1254 for i, e in enumerate(expression.expressions): 1255 if isinstance(e, exp.PropertyEQ): 1256 keys.append( 1257 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1258 ) 1259 values.append(e.expression) 1260 else: 1261 keys.append(exp.Literal.string(f"_{i}")) 1262 values.append(e) 1263 1264 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1265 1266 @unsupported_args("weight", "accuracy") 1267 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1268 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1269 1270 def alterset_sql(self, expression: exp.AlterSet) -> str: 1271 exprs = self.expressions(expression, flat=True) 1272 exprs = f" {exprs}" if exprs else "" 1273 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1274 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1275 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1276 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1277 tag = self.expressions(expression, key="tag", flat=True) 1278 tag = f" TAG {tag}" if tag else "" 1279 1280 return f"SET{exprs}{file_format}{copy_options}{tag}" 1281 1282 def strtotime_sql(self, expression: exp.StrToTime): 1283 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1284 return self.func( 1285 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1286 ) 1287 1288 def timestampsub_sql(self, expression: exp.TimestampSub): 1289 return self.sql( 1290 exp.TimestampAdd( 1291 this=expression.this, 1292 expression=expression.expression * -1, 1293 unit=expression.unit, 1294 ) 1295 ) 1296 1297 def jsonextract_sql(self, expression: exp.JSONExtract): 1298 this = expression.this 1299 1300 # JSON strings are valid coming from other dialects such as BQ 1301 return self.func( 1302 "GET_PATH", 1303 exp.ParseJSON(this=this) if this.is_string else this, 1304 expression.expression, 1305 ) 1306 1307 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1308 this = expression.this 1309 if not isinstance(this, exp.TsOrDsToTimestamp): 1310 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1311 1312 return self.func("TO_CHAR", this, self.format_time(expression)) 1313 1314 def datesub_sql(self, expression: exp.DateSub) -> str: 1315 value = expression.expression 1316 if value: 1317 value.replace(value * (-1)) 1318 else: 1319 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1320 1321 return date_delta_sql("DATEADD")(self, expression) 1322 1323 def select_sql(self, expression: exp.Select) -> str: 1324 limit = expression.args.get("limit") 1325 offset = expression.args.get("offset") 1326 if offset and not limit: 1327 expression.limit(exp.Null(), copy=False) 1328 return super().select_sql(expression)
337class Snowflake(Dialect): 338 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 339 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 340 NULL_ORDERING = "nulls_are_large" 341 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 342 SUPPORTS_USER_DEFINED_TYPES = False 343 SUPPORTS_SEMI_ANTI_JOIN = False 344 PREFER_CTE_ALIAS_COLUMN = True 345 TABLESAMPLE_SIZE_IS_PERCENT = True 346 COPY_PARAMS_ARE_CSV = False 347 ARRAY_AGG_INCLUDES_NULLS = None 348 349 TIME_MAPPING = { 350 "YYYY": "%Y", 351 "yyyy": "%Y", 352 "YY": "%y", 353 "yy": "%y", 354 "MMMM": "%B", 355 "mmmm": "%B", 356 "MON": "%b", 357 "mon": "%b", 358 "MM": "%m", 359 "mm": "%m", 360 "DD": "%d", 361 "dd": "%-d", 362 "DY": "%a", 363 "dy": "%w", 364 "HH24": "%H", 365 "hh24": "%H", 366 "HH12": "%I", 367 "hh12": "%I", 368 "MI": "%M", 369 "mi": "%M", 370 "SS": "%S", 371 "ss": "%S", 372 "FF6": "%f", 373 "ff6": "%f", 374 } 375 376 def quote_identifier(self, expression: E, identify: bool = True) -> E: 377 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 378 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 379 if ( 380 isinstance(expression, exp.Identifier) 381 and isinstance(expression.parent, exp.Table) 382 and expression.name.lower() == "dual" 383 ): 384 return expression # type: ignore 385 386 return super().quote_identifier(expression, identify=identify) 387 388 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 389 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 390 SINGLE_TOKENS.pop("$") 391 392 class Parser(parser.Parser): 393 IDENTIFY_PIVOT_STRINGS = True 394 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 395 COLON_IS_VARIANT_EXTRACT = True 396 397 ID_VAR_TOKENS = { 398 *parser.Parser.ID_VAR_TOKENS, 399 TokenType.MATCH_CONDITION, 400 } 401 402 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 403 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 404 405 FUNCTIONS = { 406 **parser.Parser.FUNCTIONS, 407 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 408 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 409 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 410 this=seq_get(args, 1), expression=seq_get(args, 0) 411 ), 412 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 413 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 414 start=seq_get(args, 0), 415 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 416 step=seq_get(args, 2), 417 ), 418 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 419 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 420 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 421 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 422 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 423 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 424 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 425 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 426 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 427 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 428 "DATE_TRUNC": _date_trunc_to_time, 429 "DATEADD": _build_date_time_add(exp.DateAdd), 430 "DATEDIFF": _build_datediff, 431 "DIV0": _build_if_from_div0, 432 "EDITDISTANCE": lambda args: exp.Levenshtein( 433 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 434 ), 435 "FLATTEN": exp.Explode.from_arg_list, 436 "GET_PATH": lambda args, dialect: exp.JSONExtract( 437 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 438 ), 439 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 440 "IFF": exp.If.from_arg_list, 441 "LAST_DAY": lambda args: exp.LastDay( 442 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 443 ), 444 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 445 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 446 "NULLIFZERO": _build_if_from_nullifzero, 447 "OBJECT_CONSTRUCT": _build_object_construct, 448 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 449 "REGEXP_REPLACE": _build_regexp_replace, 450 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 451 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 452 "RLIKE": exp.RegexpLike.from_arg_list, 453 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 454 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 455 "TIMEADD": _build_date_time_add(exp.TimeAdd), 456 "TIMEDIFF": _build_datediff, 457 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 458 "TIMESTAMPDIFF": _build_datediff, 459 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 460 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 461 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 462 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 463 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 464 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 465 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 466 "TRY_TO_TIMESTAMP": _build_datetime( 467 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 468 ), 469 "TO_CHAR": build_timetostr_or_tochar, 470 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 471 "TO_NUMBER": lambda args: exp.ToNumber( 472 this=seq_get(args, 0), 473 format=seq_get(args, 1), 474 precision=seq_get(args, 2), 475 scale=seq_get(args, 3), 476 ), 477 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 478 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 479 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 480 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 481 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 482 "TO_VARCHAR": exp.ToChar.from_arg_list, 483 "ZEROIFNULL": _build_if_from_zeroifnull, 484 } 485 486 FUNCTION_PARSERS = { 487 **parser.Parser.FUNCTION_PARSERS, 488 "DATE_PART": lambda self: self._parse_date_part(), 489 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 490 "LISTAGG": lambda self: self._parse_string_agg(), 491 } 492 FUNCTION_PARSERS.pop("TRIM") 493 494 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 495 496 RANGE_PARSERS = { 497 **parser.Parser.RANGE_PARSERS, 498 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 499 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 500 } 501 502 ALTER_PARSERS = { 503 **parser.Parser.ALTER_PARSERS, 504 "UNSET": lambda self: self.expression( 505 exp.Set, 506 tag=self._match_text_seq("TAG"), 507 expressions=self._parse_csv(self._parse_id_var), 508 unset=True, 509 ), 510 } 511 512 STATEMENT_PARSERS = { 513 **parser.Parser.STATEMENT_PARSERS, 514 TokenType.PUT: lambda self: self._parse_put(), 515 TokenType.SHOW: lambda self: self._parse_show(), 516 } 517 518 PROPERTY_PARSERS = { 519 **parser.Parser.PROPERTY_PARSERS, 520 "LOCATION": lambda self: self._parse_location_property(), 521 "TAG": lambda self: self._parse_tag(), 522 "USING": lambda self: self._match_text_seq("TEMPLATE") 523 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 524 } 525 526 TYPE_CONVERTERS = { 527 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 528 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 529 } 530 531 SHOW_PARSERS = { 532 "DATABASES": _show_parser("DATABASES"), 533 "TERSE DATABASES": _show_parser("DATABASES"), 534 "SCHEMAS": _show_parser("SCHEMAS"), 535 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 536 "OBJECTS": _show_parser("OBJECTS"), 537 "TERSE OBJECTS": _show_parser("OBJECTS"), 538 "TABLES": _show_parser("TABLES"), 539 "TERSE TABLES": _show_parser("TABLES"), 540 "VIEWS": _show_parser("VIEWS"), 541 "TERSE VIEWS": _show_parser("VIEWS"), 542 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 543 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 544 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 545 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 546 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 547 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 548 "SEQUENCES": _show_parser("SEQUENCES"), 549 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 550 "COLUMNS": _show_parser("COLUMNS"), 551 "USERS": _show_parser("USERS"), 552 "TERSE USERS": _show_parser("USERS"), 553 "FUNCTIONS": _show_parser("FUNCTIONS"), 554 "PROCEDURES": _show_parser("PROCEDURES"), 555 "WAREHOUSES": _show_parser("WAREHOUSES"), 556 } 557 558 CONSTRAINT_PARSERS = { 559 **parser.Parser.CONSTRAINT_PARSERS, 560 "WITH": lambda self: self._parse_with_constraint(), 561 "MASKING": lambda self: self._parse_with_constraint(), 562 "PROJECTION": lambda self: self._parse_with_constraint(), 563 "TAG": lambda self: self._parse_with_constraint(), 564 } 565 566 STAGED_FILE_SINGLE_TOKENS = { 567 TokenType.DOT, 568 TokenType.MOD, 569 TokenType.SLASH, 570 } 571 572 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 573 574 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 575 576 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 577 578 LAMBDAS = { 579 **parser.Parser.LAMBDAS, 580 TokenType.ARROW: lambda self, expressions: self.expression( 581 exp.Lambda, 582 this=self._replace_lambda( 583 self._parse_assignment(), 584 expressions, 585 ), 586 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 587 ), 588 } 589 590 def _parse_use(self) -> exp.Use: 591 if self._match_text_seq("SECONDARY", "ROLES"): 592 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 593 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 594 return self.expression( 595 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 596 ) 597 598 return super()._parse_use() 599 600 def _negate_range( 601 self, this: t.Optional[exp.Expression] = None 602 ) -> t.Optional[exp.Expression]: 603 if not this: 604 return this 605 606 query = this.args.get("query") 607 if isinstance(this, exp.In) and isinstance(query, exp.Query): 608 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 609 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 610 # which can produce different results (most likely a SnowFlake bug). 611 # 612 # https://docs.snowflake.com/en/sql-reference/functions/in 613 # Context: https://github.com/tobymao/sqlglot/issues/3890 614 return self.expression( 615 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 616 ) 617 618 return self.expression(exp.Not, this=this) 619 620 def _parse_tag(self) -> exp.Tags: 621 return self.expression( 622 exp.Tags, 623 expressions=self._parse_wrapped_csv(self._parse_property), 624 ) 625 626 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 627 if self._prev.token_type != TokenType.WITH: 628 self._retreat(self._index - 1) 629 630 if self._match_text_seq("MASKING", "POLICY"): 631 policy = self._parse_column() 632 return self.expression( 633 exp.MaskingPolicyColumnConstraint, 634 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 635 expressions=self._match(TokenType.USING) 636 and self._parse_wrapped_csv(self._parse_id_var), 637 ) 638 if self._match_text_seq("PROJECTION", "POLICY"): 639 policy = self._parse_column() 640 return self.expression( 641 exp.ProjectionPolicyColumnConstraint, 642 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 643 ) 644 if self._match(TokenType.TAG): 645 return self._parse_tag() 646 647 return None 648 649 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 650 if self._match(TokenType.TAG): 651 return self._parse_tag() 652 653 return super()._parse_with_property() 654 655 def _parse_create(self) -> exp.Create | exp.Command: 656 expression = super()._parse_create() 657 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 658 # Replace the Table node with the enclosed Identifier 659 expression.this.replace(expression.this.this) 660 661 return expression 662 663 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 664 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 665 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 666 this = self._parse_var() or self._parse_type() 667 668 if not this: 669 return None 670 671 self._match(TokenType.COMMA) 672 expression = self._parse_bitwise() 673 this = map_date_part(this) 674 name = this.name.upper() 675 676 if name.startswith("EPOCH"): 677 if name == "EPOCH_MILLISECOND": 678 scale = 10**3 679 elif name == "EPOCH_MICROSECOND": 680 scale = 10**6 681 elif name == "EPOCH_NANOSECOND": 682 scale = 10**9 683 else: 684 scale = None 685 686 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 687 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 688 689 if scale: 690 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 691 692 return to_unix 693 694 return self.expression(exp.Extract, this=this, expression=expression) 695 696 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 697 if is_map: 698 # Keys are strings in Snowflake's objects, see also: 699 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 700 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 701 return self._parse_slice(self._parse_string()) 702 703 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 704 705 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 706 lateral = super()._parse_lateral() 707 if not lateral: 708 return lateral 709 710 if isinstance(lateral.this, exp.Explode): 711 table_alias = lateral.args.get("alias") 712 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 713 if table_alias and not table_alias.args.get("columns"): 714 table_alias.set("columns", columns) 715 elif not table_alias: 716 exp.alias_(lateral, "_flattened", table=columns, copy=False) 717 718 return lateral 719 720 def _parse_table_parts( 721 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 722 ) -> exp.Table: 723 # https://docs.snowflake.com/en/user-guide/querying-stage 724 if self._match(TokenType.STRING, advance=False): 725 table = self._parse_string() 726 elif self._match_text_seq("@", advance=False): 727 table = self._parse_location_path() 728 else: 729 table = None 730 731 if table: 732 file_format = None 733 pattern = None 734 735 wrapped = self._match(TokenType.L_PAREN) 736 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 737 if self._match_text_seq("FILE_FORMAT", "=>"): 738 file_format = self._parse_string() or super()._parse_table_parts( 739 is_db_reference=is_db_reference 740 ) 741 elif self._match_text_seq("PATTERN", "=>"): 742 pattern = self._parse_string() 743 else: 744 break 745 746 self._match(TokenType.COMMA) 747 748 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 749 else: 750 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 751 752 return table 753 754 def _parse_table( 755 self, 756 schema: bool = False, 757 joins: bool = False, 758 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 759 parse_bracket: bool = False, 760 is_db_reference: bool = False, 761 parse_partition: bool = False, 762 ) -> t.Optional[exp.Expression]: 763 table = super()._parse_table( 764 schema=schema, 765 joins=joins, 766 alias_tokens=alias_tokens, 767 parse_bracket=parse_bracket, 768 is_db_reference=is_db_reference, 769 parse_partition=parse_partition, 770 ) 771 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 772 table_from_rows = table.this 773 for arg in exp.TableFromRows.arg_types: 774 if arg != "this": 775 table_from_rows.set(arg, table.args.get(arg)) 776 777 table = table_from_rows 778 779 return table 780 781 def _parse_id_var( 782 self, 783 any_token: bool = True, 784 tokens: t.Optional[t.Collection[TokenType]] = None, 785 ) -> t.Optional[exp.Expression]: 786 if self._match_text_seq("IDENTIFIER", "("): 787 identifier = ( 788 super()._parse_id_var(any_token=any_token, tokens=tokens) 789 or self._parse_string() 790 ) 791 self._match_r_paren() 792 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 793 794 return super()._parse_id_var(any_token=any_token, tokens=tokens) 795 796 def _parse_show_snowflake(self, this: str) -> exp.Show: 797 scope = None 798 scope_kind = None 799 800 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 801 # which is syntactically valid but has no effect on the output 802 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 803 804 history = self._match_text_seq("HISTORY") 805 806 like = self._parse_string() if self._match(TokenType.LIKE) else None 807 808 if self._match(TokenType.IN): 809 if self._match_text_seq("ACCOUNT"): 810 scope_kind = "ACCOUNT" 811 elif self._match_text_seq("CLASS"): 812 scope_kind = "CLASS" 813 scope = self._parse_table_parts() 814 elif self._match_text_seq("APPLICATION"): 815 scope_kind = "APPLICATION" 816 if self._match_text_seq("PACKAGE"): 817 scope_kind += " PACKAGE" 818 scope = self._parse_table_parts() 819 elif self._match_set(self.DB_CREATABLES): 820 scope_kind = self._prev.text.upper() 821 if self._curr: 822 scope = self._parse_table_parts() 823 elif self._curr: 824 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 825 scope = self._parse_table_parts() 826 827 return self.expression( 828 exp.Show, 829 **{ 830 "terse": terse, 831 "this": this, 832 "history": history, 833 "like": like, 834 "scope": scope, 835 "scope_kind": scope_kind, 836 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 837 "limit": self._parse_limit(), 838 "from": self._parse_string() if self._match(TokenType.FROM) else None, 839 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 840 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 841 }, 842 ) 843 844 def _parse_put(self) -> exp.Put | exp.Command: 845 if self._curr.token_type != TokenType.STRING: 846 return self._parse_as_command(self._prev) 847 848 return self.expression( 849 exp.Put, 850 this=self._parse_string(), 851 target=self._parse_location_path(), 852 properties=self._parse_properties(), 853 ) 854 855 def _parse_location_property(self) -> exp.LocationProperty: 856 self._match(TokenType.EQ) 857 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 858 859 def _parse_file_location(self) -> t.Optional[exp.Expression]: 860 # Parse either a subquery or a staged file 861 return ( 862 self._parse_select(table=True, parse_subquery_alias=False) 863 if self._match(TokenType.L_PAREN, advance=False) 864 else self._parse_table_parts() 865 ) 866 867 def _parse_location_path(self) -> exp.Var: 868 start = self._curr 869 self._advance_any(ignore_reserved=True) 870 871 # We avoid consuming a comma token because external tables like @foo and @bar 872 # can be joined in a query with a comma separator, as well as closing paren 873 # in case of subqueries 874 while self._is_connected() and not self._match_set( 875 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 876 ): 877 self._advance_any(ignore_reserved=True) 878 879 return exp.var(self._find_sql(start, self._prev)) 880 881 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 882 this = super()._parse_lambda_arg() 883 884 if not this: 885 return this 886 887 typ = self._parse_types() 888 889 if typ: 890 return self.expression(exp.Cast, this=this, to=typ) 891 892 return this 893 894 def _parse_foreign_key(self) -> exp.ForeignKey: 895 # inlineFK, the REFERENCES columns are implied 896 if self._match(TokenType.REFERENCES, advance=False): 897 return self.expression(exp.ForeignKey) 898 899 # outoflineFK, explicitly names the columns 900 return super()._parse_foreign_key() 901 902 class Tokenizer(tokens.Tokenizer): 903 STRING_ESCAPES = ["\\", "'"] 904 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 905 RAW_STRINGS = ["$$"] 906 COMMENTS = ["--", "//", ("/*", "*/")] 907 NESTED_COMMENTS = False 908 909 KEYWORDS = { 910 **tokens.Tokenizer.KEYWORDS, 911 "FILE://": TokenType.URI_START, 912 "BYTEINT": TokenType.INT, 913 "CHAR VARYING": TokenType.VARCHAR, 914 "CHARACTER VARYING": TokenType.VARCHAR, 915 "EXCLUDE": TokenType.EXCEPT, 916 "ILIKE ANY": TokenType.ILIKE_ANY, 917 "LIKE ANY": TokenType.LIKE_ANY, 918 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 919 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 920 "MINUS": TokenType.EXCEPT, 921 "NCHAR VARYING": TokenType.VARCHAR, 922 "PUT": TokenType.PUT, 923 "REMOVE": TokenType.COMMAND, 924 "RM": TokenType.COMMAND, 925 "SAMPLE": TokenType.TABLE_SAMPLE, 926 "SQL_DOUBLE": TokenType.DOUBLE, 927 "SQL_VARCHAR": TokenType.VARCHAR, 928 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 929 "TAG": TokenType.TAG, 930 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 931 "TOP": TokenType.TOP, 932 "WAREHOUSE": TokenType.WAREHOUSE, 933 "STREAMLIT": TokenType.STREAMLIT, 934 } 935 KEYWORDS.pop("/*+") 936 937 SINGLE_TOKENS = { 938 **tokens.Tokenizer.SINGLE_TOKENS, 939 "$": TokenType.PARAMETER, 940 } 941 942 VAR_SINGLE_TOKENS = {"$"} 943 944 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 945 946 class Generator(generator.Generator): 947 PARAMETER_TOKEN = "$" 948 MATCHED_BY_SOURCE = False 949 SINGLE_STRING_INTERVAL = True 950 JOIN_HINTS = False 951 TABLE_HINTS = False 952 QUERY_HINTS = False 953 AGGREGATE_FILTER_SUPPORTED = False 954 SUPPORTS_TABLE_COPY = False 955 COLLATE_IS_FUNC = True 956 LIMIT_ONLY_LITERALS = True 957 JSON_KEY_VALUE_PAIR_SEP = "," 958 INSERT_OVERWRITE = " OVERWRITE INTO" 959 STRUCT_DELIMITER = ("(", ")") 960 COPY_PARAMS_ARE_WRAPPED = False 961 COPY_PARAMS_EQ_REQUIRED = True 962 STAR_EXCEPT = "EXCLUDE" 963 SUPPORTS_EXPLODING_PROJECTIONS = False 964 ARRAY_CONCAT_IS_VAR_LEN = False 965 SUPPORTS_CONVERT_TIMEZONE = True 966 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 967 SUPPORTS_MEDIAN = True 968 ARRAY_SIZE_NAME = "ARRAY_SIZE" 969 970 TRANSFORMS = { 971 **generator.Generator.TRANSFORMS, 972 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 973 exp.ArgMax: rename_func("MAX_BY"), 974 exp.ArgMin: rename_func("MIN_BY"), 975 exp.Array: inline_array_sql, 976 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 977 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 978 exp.AtTimeZone: lambda self, e: self.func( 979 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 980 ), 981 exp.BitwiseOr: rename_func("BITOR"), 982 exp.BitwiseXor: rename_func("BITXOR"), 983 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 984 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 985 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 986 exp.DateAdd: date_delta_sql("DATEADD"), 987 exp.DateDiff: date_delta_sql("DATEDIFF"), 988 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 989 exp.DatetimeDiff: timestampdiff_sql, 990 exp.DateStrToDate: datestrtodate_sql, 991 exp.DayOfMonth: rename_func("DAYOFMONTH"), 992 exp.DayOfWeek: rename_func("DAYOFWEEK"), 993 exp.DayOfYear: rename_func("DAYOFYEAR"), 994 exp.Explode: rename_func("FLATTEN"), 995 exp.Extract: rename_func("DATE_PART"), 996 exp.FromTimeZone: lambda self, e: self.func( 997 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 998 ), 999 exp.GenerateSeries: lambda self, e: self.func( 1000 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1001 ), 1002 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1003 exp.If: if_sql(name="IFF", false_value="NULL"), 1004 exp.JSONExtractArray: _json_extract_value_array_sql, 1005 exp.JSONExtractScalar: lambda self, e: self.func( 1006 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1007 ), 1008 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1009 exp.JSONPathRoot: lambda *_: "", 1010 exp.JSONValueArray: _json_extract_value_array_sql, 1011 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1012 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1013 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1014 exp.MakeInterval: no_make_interval_sql, 1015 exp.Max: max_or_greatest, 1016 exp.Min: min_or_least, 1017 exp.ParseJSON: lambda self, e: self.func( 1018 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1019 ), 1020 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1021 exp.PercentileCont: transforms.preprocess( 1022 [transforms.add_within_group_for_percentiles] 1023 ), 1024 exp.PercentileDisc: transforms.preprocess( 1025 [transforms.add_within_group_for_percentiles] 1026 ), 1027 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1028 exp.RegexpExtract: _regexpextract_sql, 1029 exp.RegexpExtractAll: _regexpextract_sql, 1030 exp.RegexpILike: _regexpilike_sql, 1031 exp.Rand: rename_func("RANDOM"), 1032 exp.Select: transforms.preprocess( 1033 [ 1034 transforms.eliminate_distinct_on, 1035 transforms.explode_projection_to_unnest(), 1036 transforms.eliminate_semi_and_anti_joins, 1037 _transform_generate_date_array, 1038 ] 1039 ), 1040 exp.SHA: rename_func("SHA1"), 1041 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1042 exp.StartsWith: rename_func("STARTSWITH"), 1043 exp.StrPosition: lambda self, e: strposition_sql( 1044 self, e, func_name="CHARINDEX", supports_position=True 1045 ), 1046 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1047 exp.Stuff: rename_func("INSERT"), 1048 exp.TimeAdd: date_delta_sql("TIMEADD"), 1049 exp.Timestamp: no_timestamp_sql, 1050 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1051 exp.TimestampDiff: lambda self, e: self.func( 1052 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1053 ), 1054 exp.TimestampTrunc: timestamptrunc_sql(), 1055 exp.TimeStrToTime: timestrtotime_sql, 1056 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1057 exp.ToArray: rename_func("TO_ARRAY"), 1058 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1059 exp.ToDouble: rename_func("TO_DOUBLE"), 1060 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1061 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1062 exp.TsOrDsToDate: lambda self, e: self.func( 1063 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1064 ), 1065 exp.TsOrDsToTime: lambda self, e: self.func( 1066 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1067 ), 1068 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1069 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1070 exp.Uuid: rename_func("UUID_STRING"), 1071 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1072 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1073 exp.Xor: rename_func("BOOLXOR"), 1074 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1075 rename_func("EDITDISTANCE") 1076 ), 1077 } 1078 1079 SUPPORTED_JSON_PATH_PARTS = { 1080 exp.JSONPathKey, 1081 exp.JSONPathRoot, 1082 exp.JSONPathSubscript, 1083 } 1084 1085 TYPE_MAPPING = { 1086 **generator.Generator.TYPE_MAPPING, 1087 exp.DataType.Type.NESTED: "OBJECT", 1088 exp.DataType.Type.STRUCT: "OBJECT", 1089 } 1090 1091 TOKEN_MAPPING = { 1092 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1093 } 1094 1095 PROPERTIES_LOCATION = { 1096 **generator.Generator.PROPERTIES_LOCATION, 1097 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1098 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1099 } 1100 1101 UNSUPPORTED_VALUES_EXPRESSIONS = { 1102 exp.Map, 1103 exp.StarMap, 1104 exp.Struct, 1105 exp.VarMap, 1106 } 1107 1108 def with_properties(self, properties: exp.Properties) -> str: 1109 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1110 1111 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1112 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1113 values_as_table = False 1114 1115 return super().values_sql(expression, values_as_table=values_as_table) 1116 1117 def datatype_sql(self, expression: exp.DataType) -> str: 1118 expressions = expression.expressions 1119 if ( 1120 expressions 1121 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1122 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1123 ): 1124 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1125 return "OBJECT" 1126 1127 return super().datatype_sql(expression) 1128 1129 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1130 return self.func( 1131 "TO_NUMBER", 1132 expression.this, 1133 expression.args.get("format"), 1134 expression.args.get("precision"), 1135 expression.args.get("scale"), 1136 ) 1137 1138 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1139 milli = expression.args.get("milli") 1140 if milli is not None: 1141 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1142 expression.set("nano", milli_to_nano) 1143 1144 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1145 1146 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1147 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1148 return self.func("TO_GEOGRAPHY", expression.this) 1149 if expression.is_type(exp.DataType.Type.GEOMETRY): 1150 return self.func("TO_GEOMETRY", expression.this) 1151 1152 return super().cast_sql(expression, safe_prefix=safe_prefix) 1153 1154 def trycast_sql(self, expression: exp.TryCast) -> str: 1155 value = expression.this 1156 1157 if value.type is None: 1158 from sqlglot.optimizer.annotate_types import annotate_types 1159 1160 value = annotate_types(value) 1161 1162 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1163 return super().trycast_sql(expression) 1164 1165 # TRY_CAST only works for string values in Snowflake 1166 return self.cast_sql(expression) 1167 1168 def log_sql(self, expression: exp.Log) -> str: 1169 if not expression.expression: 1170 return self.func("LN", expression.this) 1171 1172 return super().log_sql(expression) 1173 1174 def unnest_sql(self, expression: exp.Unnest) -> str: 1175 unnest_alias = expression.args.get("alias") 1176 offset = expression.args.get("offset") 1177 1178 columns = [ 1179 exp.to_identifier("seq"), 1180 exp.to_identifier("key"), 1181 exp.to_identifier("path"), 1182 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1183 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1184 or exp.to_identifier("value"), 1185 exp.to_identifier("this"), 1186 ] 1187 1188 if unnest_alias: 1189 unnest_alias.set("columns", columns) 1190 else: 1191 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1192 1193 table_input = self.sql(expression.expressions[0]) 1194 if not table_input.startswith("INPUT =>"): 1195 table_input = f"INPUT => {table_input}" 1196 1197 explode = f"TABLE(FLATTEN({table_input}))" 1198 alias = self.sql(unnest_alias) 1199 alias = f" AS {alias}" if alias else "" 1200 return f"{explode}{alias}" 1201 1202 def show_sql(self, expression: exp.Show) -> str: 1203 terse = "TERSE " if expression.args.get("terse") else "" 1204 history = " HISTORY" if expression.args.get("history") else "" 1205 like = self.sql(expression, "like") 1206 like = f" LIKE {like}" if like else "" 1207 1208 scope = self.sql(expression, "scope") 1209 scope = f" {scope}" if scope else "" 1210 1211 scope_kind = self.sql(expression, "scope_kind") 1212 if scope_kind: 1213 scope_kind = f" IN {scope_kind}" 1214 1215 starts_with = self.sql(expression, "starts_with") 1216 if starts_with: 1217 starts_with = f" STARTS WITH {starts_with}" 1218 1219 limit = self.sql(expression, "limit") 1220 1221 from_ = self.sql(expression, "from") 1222 if from_: 1223 from_ = f" FROM {from_}" 1224 1225 privileges = self.expressions(expression, key="privileges", flat=True) 1226 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1227 1228 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1229 1230 def describe_sql(self, expression: exp.Describe) -> str: 1231 # Default to table if kind is unknown 1232 kind_value = expression.args.get("kind") or "TABLE" 1233 kind = f" {kind_value}" if kind_value else "" 1234 this = f" {self.sql(expression, 'this')}" 1235 expressions = self.expressions(expression, flat=True) 1236 expressions = f" {expressions}" if expressions else "" 1237 return f"DESCRIBE{kind}{this}{expressions}" 1238 1239 def generatedasidentitycolumnconstraint_sql( 1240 self, expression: exp.GeneratedAsIdentityColumnConstraint 1241 ) -> str: 1242 start = expression.args.get("start") 1243 start = f" START {start}" if start else "" 1244 increment = expression.args.get("increment") 1245 increment = f" INCREMENT {increment}" if increment else "" 1246 return f"AUTOINCREMENT{start}{increment}" 1247 1248 def cluster_sql(self, expression: exp.Cluster) -> str: 1249 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1250 1251 def struct_sql(self, expression: exp.Struct) -> str: 1252 keys = [] 1253 values = [] 1254 1255 for i, e in enumerate(expression.expressions): 1256 if isinstance(e, exp.PropertyEQ): 1257 keys.append( 1258 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1259 ) 1260 values.append(e.expression) 1261 else: 1262 keys.append(exp.Literal.string(f"_{i}")) 1263 values.append(e) 1264 1265 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1266 1267 @unsupported_args("weight", "accuracy") 1268 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1269 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1270 1271 def alterset_sql(self, expression: exp.AlterSet) -> str: 1272 exprs = self.expressions(expression, flat=True) 1273 exprs = f" {exprs}" if exprs else "" 1274 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1275 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1276 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1277 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1278 tag = self.expressions(expression, key="tag", flat=True) 1279 tag = f" TAG {tag}" if tag else "" 1280 1281 return f"SET{exprs}{file_format}{copy_options}{tag}" 1282 1283 def strtotime_sql(self, expression: exp.StrToTime): 1284 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1285 return self.func( 1286 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1287 ) 1288 1289 def timestampsub_sql(self, expression: exp.TimestampSub): 1290 return self.sql( 1291 exp.TimestampAdd( 1292 this=expression.this, 1293 expression=expression.expression * -1, 1294 unit=expression.unit, 1295 ) 1296 ) 1297 1298 def jsonextract_sql(self, expression: exp.JSONExtract): 1299 this = expression.this 1300 1301 # JSON strings are valid coming from other dialects such as BQ 1302 return self.func( 1303 "GET_PATH", 1304 exp.ParseJSON(this=this) if this.is_string else this, 1305 expression.expression, 1306 ) 1307 1308 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1309 this = expression.this 1310 if not isinstance(this, exp.TsOrDsToTimestamp): 1311 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1312 1313 return self.func("TO_CHAR", this, self.format_time(expression)) 1314 1315 def datesub_sql(self, expression: exp.DateSub) -> str: 1316 value = expression.expression 1317 if value: 1318 value.replace(value * (-1)) 1319 else: 1320 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1321 1322 return date_delta_sql("DATEADD")(self, expression) 1323 1324 def select_sql(self, expression: exp.Select) -> str: 1325 limit = expression.args.get("limit") 1326 offset = expression.args.get("offset") 1327 if offset and not limit: 1328 expression.limit(exp.Null(), copy=False) 1329 return super().select_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
376 def quote_identifier(self, expression: E, identify: bool = True) -> E: 377 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 378 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 379 if ( 380 isinstance(expression, exp.Identifier) 381 and isinstance(expression.parent, exp.Table) 382 and expression.name.lower() == "dual" 383 ): 384 return expression # type: ignore 385 386 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
388 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 389 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 390 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
392 class Parser(parser.Parser): 393 IDENTIFY_PIVOT_STRINGS = True 394 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 395 COLON_IS_VARIANT_EXTRACT = True 396 397 ID_VAR_TOKENS = { 398 *parser.Parser.ID_VAR_TOKENS, 399 TokenType.MATCH_CONDITION, 400 } 401 402 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 403 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 404 405 FUNCTIONS = { 406 **parser.Parser.FUNCTIONS, 407 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 408 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 409 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 410 this=seq_get(args, 1), expression=seq_get(args, 0) 411 ), 412 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 413 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 414 start=seq_get(args, 0), 415 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 416 step=seq_get(args, 2), 417 ), 418 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 419 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 420 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 421 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 422 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 423 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 424 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 425 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 426 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 427 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 428 "DATE_TRUNC": _date_trunc_to_time, 429 "DATEADD": _build_date_time_add(exp.DateAdd), 430 "DATEDIFF": _build_datediff, 431 "DIV0": _build_if_from_div0, 432 "EDITDISTANCE": lambda args: exp.Levenshtein( 433 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 434 ), 435 "FLATTEN": exp.Explode.from_arg_list, 436 "GET_PATH": lambda args, dialect: exp.JSONExtract( 437 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 438 ), 439 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 440 "IFF": exp.If.from_arg_list, 441 "LAST_DAY": lambda args: exp.LastDay( 442 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 443 ), 444 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 445 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 446 "NULLIFZERO": _build_if_from_nullifzero, 447 "OBJECT_CONSTRUCT": _build_object_construct, 448 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 449 "REGEXP_REPLACE": _build_regexp_replace, 450 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 451 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 452 "RLIKE": exp.RegexpLike.from_arg_list, 453 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 454 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 455 "TIMEADD": _build_date_time_add(exp.TimeAdd), 456 "TIMEDIFF": _build_datediff, 457 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 458 "TIMESTAMPDIFF": _build_datediff, 459 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 460 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 461 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 462 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 463 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 464 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 465 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 466 "TRY_TO_TIMESTAMP": _build_datetime( 467 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 468 ), 469 "TO_CHAR": build_timetostr_or_tochar, 470 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 471 "TO_NUMBER": lambda args: exp.ToNumber( 472 this=seq_get(args, 0), 473 format=seq_get(args, 1), 474 precision=seq_get(args, 2), 475 scale=seq_get(args, 3), 476 ), 477 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 478 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 479 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 480 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 481 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 482 "TO_VARCHAR": exp.ToChar.from_arg_list, 483 "ZEROIFNULL": _build_if_from_zeroifnull, 484 } 485 486 FUNCTION_PARSERS = { 487 **parser.Parser.FUNCTION_PARSERS, 488 "DATE_PART": lambda self: self._parse_date_part(), 489 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 490 "LISTAGG": lambda self: self._parse_string_agg(), 491 } 492 FUNCTION_PARSERS.pop("TRIM") 493 494 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 495 496 RANGE_PARSERS = { 497 **parser.Parser.RANGE_PARSERS, 498 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 499 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 500 } 501 502 ALTER_PARSERS = { 503 **parser.Parser.ALTER_PARSERS, 504 "UNSET": lambda self: self.expression( 505 exp.Set, 506 tag=self._match_text_seq("TAG"), 507 expressions=self._parse_csv(self._parse_id_var), 508 unset=True, 509 ), 510 } 511 512 STATEMENT_PARSERS = { 513 **parser.Parser.STATEMENT_PARSERS, 514 TokenType.PUT: lambda self: self._parse_put(), 515 TokenType.SHOW: lambda self: self._parse_show(), 516 } 517 518 PROPERTY_PARSERS = { 519 **parser.Parser.PROPERTY_PARSERS, 520 "LOCATION": lambda self: self._parse_location_property(), 521 "TAG": lambda self: self._parse_tag(), 522 "USING": lambda self: self._match_text_seq("TEMPLATE") 523 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 524 } 525 526 TYPE_CONVERTERS = { 527 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 528 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 529 } 530 531 SHOW_PARSERS = { 532 "DATABASES": _show_parser("DATABASES"), 533 "TERSE DATABASES": _show_parser("DATABASES"), 534 "SCHEMAS": _show_parser("SCHEMAS"), 535 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 536 "OBJECTS": _show_parser("OBJECTS"), 537 "TERSE OBJECTS": _show_parser("OBJECTS"), 538 "TABLES": _show_parser("TABLES"), 539 "TERSE TABLES": _show_parser("TABLES"), 540 "VIEWS": _show_parser("VIEWS"), 541 "TERSE VIEWS": _show_parser("VIEWS"), 542 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 543 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 544 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 545 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 546 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 547 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 548 "SEQUENCES": _show_parser("SEQUENCES"), 549 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 550 "COLUMNS": _show_parser("COLUMNS"), 551 "USERS": _show_parser("USERS"), 552 "TERSE USERS": _show_parser("USERS"), 553 "FUNCTIONS": _show_parser("FUNCTIONS"), 554 "PROCEDURES": _show_parser("PROCEDURES"), 555 "WAREHOUSES": _show_parser("WAREHOUSES"), 556 } 557 558 CONSTRAINT_PARSERS = { 559 **parser.Parser.CONSTRAINT_PARSERS, 560 "WITH": lambda self: self._parse_with_constraint(), 561 "MASKING": lambda self: self._parse_with_constraint(), 562 "PROJECTION": lambda self: self._parse_with_constraint(), 563 "TAG": lambda self: self._parse_with_constraint(), 564 } 565 566 STAGED_FILE_SINGLE_TOKENS = { 567 TokenType.DOT, 568 TokenType.MOD, 569 TokenType.SLASH, 570 } 571 572 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 573 574 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 575 576 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 577 578 LAMBDAS = { 579 **parser.Parser.LAMBDAS, 580 TokenType.ARROW: lambda self, expressions: self.expression( 581 exp.Lambda, 582 this=self._replace_lambda( 583 self._parse_assignment(), 584 expressions, 585 ), 586 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 587 ), 588 } 589 590 def _parse_use(self) -> exp.Use: 591 if self._match_text_seq("SECONDARY", "ROLES"): 592 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 593 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 594 return self.expression( 595 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 596 ) 597 598 return super()._parse_use() 599 600 def _negate_range( 601 self, this: t.Optional[exp.Expression] = None 602 ) -> t.Optional[exp.Expression]: 603 if not this: 604 return this 605 606 query = this.args.get("query") 607 if isinstance(this, exp.In) and isinstance(query, exp.Query): 608 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 609 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 610 # which can produce different results (most likely a SnowFlake bug). 611 # 612 # https://docs.snowflake.com/en/sql-reference/functions/in 613 # Context: https://github.com/tobymao/sqlglot/issues/3890 614 return self.expression( 615 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 616 ) 617 618 return self.expression(exp.Not, this=this) 619 620 def _parse_tag(self) -> exp.Tags: 621 return self.expression( 622 exp.Tags, 623 expressions=self._parse_wrapped_csv(self._parse_property), 624 ) 625 626 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 627 if self._prev.token_type != TokenType.WITH: 628 self._retreat(self._index - 1) 629 630 if self._match_text_seq("MASKING", "POLICY"): 631 policy = self._parse_column() 632 return self.expression( 633 exp.MaskingPolicyColumnConstraint, 634 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 635 expressions=self._match(TokenType.USING) 636 and self._parse_wrapped_csv(self._parse_id_var), 637 ) 638 if self._match_text_seq("PROJECTION", "POLICY"): 639 policy = self._parse_column() 640 return self.expression( 641 exp.ProjectionPolicyColumnConstraint, 642 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 643 ) 644 if self._match(TokenType.TAG): 645 return self._parse_tag() 646 647 return None 648 649 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 650 if self._match(TokenType.TAG): 651 return self._parse_tag() 652 653 return super()._parse_with_property() 654 655 def _parse_create(self) -> exp.Create | exp.Command: 656 expression = super()._parse_create() 657 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 658 # Replace the Table node with the enclosed Identifier 659 expression.this.replace(expression.this.this) 660 661 return expression 662 663 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 664 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 665 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 666 this = self._parse_var() or self._parse_type() 667 668 if not this: 669 return None 670 671 self._match(TokenType.COMMA) 672 expression = self._parse_bitwise() 673 this = map_date_part(this) 674 name = this.name.upper() 675 676 if name.startswith("EPOCH"): 677 if name == "EPOCH_MILLISECOND": 678 scale = 10**3 679 elif name == "EPOCH_MICROSECOND": 680 scale = 10**6 681 elif name == "EPOCH_NANOSECOND": 682 scale = 10**9 683 else: 684 scale = None 685 686 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 687 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 688 689 if scale: 690 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 691 692 return to_unix 693 694 return self.expression(exp.Extract, this=this, expression=expression) 695 696 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 697 if is_map: 698 # Keys are strings in Snowflake's objects, see also: 699 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 700 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 701 return self._parse_slice(self._parse_string()) 702 703 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 704 705 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 706 lateral = super()._parse_lateral() 707 if not lateral: 708 return lateral 709 710 if isinstance(lateral.this, exp.Explode): 711 table_alias = lateral.args.get("alias") 712 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 713 if table_alias and not table_alias.args.get("columns"): 714 table_alias.set("columns", columns) 715 elif not table_alias: 716 exp.alias_(lateral, "_flattened", table=columns, copy=False) 717 718 return lateral 719 720 def _parse_table_parts( 721 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 722 ) -> exp.Table: 723 # https://docs.snowflake.com/en/user-guide/querying-stage 724 if self._match(TokenType.STRING, advance=False): 725 table = self._parse_string() 726 elif self._match_text_seq("@", advance=False): 727 table = self._parse_location_path() 728 else: 729 table = None 730 731 if table: 732 file_format = None 733 pattern = None 734 735 wrapped = self._match(TokenType.L_PAREN) 736 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 737 if self._match_text_seq("FILE_FORMAT", "=>"): 738 file_format = self._parse_string() or super()._parse_table_parts( 739 is_db_reference=is_db_reference 740 ) 741 elif self._match_text_seq("PATTERN", "=>"): 742 pattern = self._parse_string() 743 else: 744 break 745 746 self._match(TokenType.COMMA) 747 748 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 749 else: 750 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 751 752 return table 753 754 def _parse_table( 755 self, 756 schema: bool = False, 757 joins: bool = False, 758 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 759 parse_bracket: bool = False, 760 is_db_reference: bool = False, 761 parse_partition: bool = False, 762 ) -> t.Optional[exp.Expression]: 763 table = super()._parse_table( 764 schema=schema, 765 joins=joins, 766 alias_tokens=alias_tokens, 767 parse_bracket=parse_bracket, 768 is_db_reference=is_db_reference, 769 parse_partition=parse_partition, 770 ) 771 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 772 table_from_rows = table.this 773 for arg in exp.TableFromRows.arg_types: 774 if arg != "this": 775 table_from_rows.set(arg, table.args.get(arg)) 776 777 table = table_from_rows 778 779 return table 780 781 def _parse_id_var( 782 self, 783 any_token: bool = True, 784 tokens: t.Optional[t.Collection[TokenType]] = None, 785 ) -> t.Optional[exp.Expression]: 786 if self._match_text_seq("IDENTIFIER", "("): 787 identifier = ( 788 super()._parse_id_var(any_token=any_token, tokens=tokens) 789 or self._parse_string() 790 ) 791 self._match_r_paren() 792 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 793 794 return super()._parse_id_var(any_token=any_token, tokens=tokens) 795 796 def _parse_show_snowflake(self, this: str) -> exp.Show: 797 scope = None 798 scope_kind = None 799 800 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 801 # which is syntactically valid but has no effect on the output 802 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 803 804 history = self._match_text_seq("HISTORY") 805 806 like = self._parse_string() if self._match(TokenType.LIKE) else None 807 808 if self._match(TokenType.IN): 809 if self._match_text_seq("ACCOUNT"): 810 scope_kind = "ACCOUNT" 811 elif self._match_text_seq("CLASS"): 812 scope_kind = "CLASS" 813 scope = self._parse_table_parts() 814 elif self._match_text_seq("APPLICATION"): 815 scope_kind = "APPLICATION" 816 if self._match_text_seq("PACKAGE"): 817 scope_kind += " PACKAGE" 818 scope = self._parse_table_parts() 819 elif self._match_set(self.DB_CREATABLES): 820 scope_kind = self._prev.text.upper() 821 if self._curr: 822 scope = self._parse_table_parts() 823 elif self._curr: 824 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 825 scope = self._parse_table_parts() 826 827 return self.expression( 828 exp.Show, 829 **{ 830 "terse": terse, 831 "this": this, 832 "history": history, 833 "like": like, 834 "scope": scope, 835 "scope_kind": scope_kind, 836 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 837 "limit": self._parse_limit(), 838 "from": self._parse_string() if self._match(TokenType.FROM) else None, 839 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 840 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 841 }, 842 ) 843 844 def _parse_put(self) -> exp.Put | exp.Command: 845 if self._curr.token_type != TokenType.STRING: 846 return self._parse_as_command(self._prev) 847 848 return self.expression( 849 exp.Put, 850 this=self._parse_string(), 851 target=self._parse_location_path(), 852 properties=self._parse_properties(), 853 ) 854 855 def _parse_location_property(self) -> exp.LocationProperty: 856 self._match(TokenType.EQ) 857 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 858 859 def _parse_file_location(self) -> t.Optional[exp.Expression]: 860 # Parse either a subquery or a staged file 861 return ( 862 self._parse_select(table=True, parse_subquery_alias=False) 863 if self._match(TokenType.L_PAREN, advance=False) 864 else self._parse_table_parts() 865 ) 866 867 def _parse_location_path(self) -> exp.Var: 868 start = self._curr 869 self._advance_any(ignore_reserved=True) 870 871 # We avoid consuming a comma token because external tables like @foo and @bar 872 # can be joined in a query with a comma separator, as well as closing paren 873 # in case of subqueries 874 while self._is_connected() and not self._match_set( 875 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 876 ): 877 self._advance_any(ignore_reserved=True) 878 879 return exp.var(self._find_sql(start, self._prev)) 880 881 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 882 this = super()._parse_lambda_arg() 883 884 if not this: 885 return this 886 887 typ = self._parse_types() 888 889 if typ: 890 return self.expression(exp.Cast, this=this, to=typ) 891 892 return this 893 894 def _parse_foreign_key(self) -> exp.ForeignKey: 895 # inlineFK, the REFERENCES columns are implied 896 if self._match(TokenType.REFERENCES, advance=False): 897 return self.expression(exp.ForeignKey) 898 899 # outoflineFK, explicitly names the columns 900 return super()._parse_foreign_key()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
902 class Tokenizer(tokens.Tokenizer): 903 STRING_ESCAPES = ["\\", "'"] 904 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 905 RAW_STRINGS = ["$$"] 906 COMMENTS = ["--", "//", ("/*", "*/")] 907 NESTED_COMMENTS = False 908 909 KEYWORDS = { 910 **tokens.Tokenizer.KEYWORDS, 911 "FILE://": TokenType.URI_START, 912 "BYTEINT": TokenType.INT, 913 "CHAR VARYING": TokenType.VARCHAR, 914 "CHARACTER VARYING": TokenType.VARCHAR, 915 "EXCLUDE": TokenType.EXCEPT, 916 "ILIKE ANY": TokenType.ILIKE_ANY, 917 "LIKE ANY": TokenType.LIKE_ANY, 918 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 919 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 920 "MINUS": TokenType.EXCEPT, 921 "NCHAR VARYING": TokenType.VARCHAR, 922 "PUT": TokenType.PUT, 923 "REMOVE": TokenType.COMMAND, 924 "RM": TokenType.COMMAND, 925 "SAMPLE": TokenType.TABLE_SAMPLE, 926 "SQL_DOUBLE": TokenType.DOUBLE, 927 "SQL_VARCHAR": TokenType.VARCHAR, 928 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 929 "TAG": TokenType.TAG, 930 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 931 "TOP": TokenType.TOP, 932 "WAREHOUSE": TokenType.WAREHOUSE, 933 "STREAMLIT": TokenType.STREAMLIT, 934 } 935 KEYWORDS.pop("/*+") 936 937 SINGLE_TOKENS = { 938 **tokens.Tokenizer.SINGLE_TOKENS, 939 "$": TokenType.PARAMETER, 940 } 941 942 VAR_SINGLE_TOKENS = {"$"} 943 944 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
946 class Generator(generator.Generator): 947 PARAMETER_TOKEN = "$" 948 MATCHED_BY_SOURCE = False 949 SINGLE_STRING_INTERVAL = True 950 JOIN_HINTS = False 951 TABLE_HINTS = False 952 QUERY_HINTS = False 953 AGGREGATE_FILTER_SUPPORTED = False 954 SUPPORTS_TABLE_COPY = False 955 COLLATE_IS_FUNC = True 956 LIMIT_ONLY_LITERALS = True 957 JSON_KEY_VALUE_PAIR_SEP = "," 958 INSERT_OVERWRITE = " OVERWRITE INTO" 959 STRUCT_DELIMITER = ("(", ")") 960 COPY_PARAMS_ARE_WRAPPED = False 961 COPY_PARAMS_EQ_REQUIRED = True 962 STAR_EXCEPT = "EXCLUDE" 963 SUPPORTS_EXPLODING_PROJECTIONS = False 964 ARRAY_CONCAT_IS_VAR_LEN = False 965 SUPPORTS_CONVERT_TIMEZONE = True 966 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 967 SUPPORTS_MEDIAN = True 968 ARRAY_SIZE_NAME = "ARRAY_SIZE" 969 970 TRANSFORMS = { 971 **generator.Generator.TRANSFORMS, 972 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 973 exp.ArgMax: rename_func("MAX_BY"), 974 exp.ArgMin: rename_func("MIN_BY"), 975 exp.Array: inline_array_sql, 976 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 977 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 978 exp.AtTimeZone: lambda self, e: self.func( 979 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 980 ), 981 exp.BitwiseOr: rename_func("BITOR"), 982 exp.BitwiseXor: rename_func("BITXOR"), 983 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 984 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 985 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 986 exp.DateAdd: date_delta_sql("DATEADD"), 987 exp.DateDiff: date_delta_sql("DATEDIFF"), 988 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 989 exp.DatetimeDiff: timestampdiff_sql, 990 exp.DateStrToDate: datestrtodate_sql, 991 exp.DayOfMonth: rename_func("DAYOFMONTH"), 992 exp.DayOfWeek: rename_func("DAYOFWEEK"), 993 exp.DayOfYear: rename_func("DAYOFYEAR"), 994 exp.Explode: rename_func("FLATTEN"), 995 exp.Extract: rename_func("DATE_PART"), 996 exp.FromTimeZone: lambda self, e: self.func( 997 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 998 ), 999 exp.GenerateSeries: lambda self, e: self.func( 1000 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1001 ), 1002 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1003 exp.If: if_sql(name="IFF", false_value="NULL"), 1004 exp.JSONExtractArray: _json_extract_value_array_sql, 1005 exp.JSONExtractScalar: lambda self, e: self.func( 1006 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1007 ), 1008 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1009 exp.JSONPathRoot: lambda *_: "", 1010 exp.JSONValueArray: _json_extract_value_array_sql, 1011 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1012 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1013 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1014 exp.MakeInterval: no_make_interval_sql, 1015 exp.Max: max_or_greatest, 1016 exp.Min: min_or_least, 1017 exp.ParseJSON: lambda self, e: self.func( 1018 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1019 ), 1020 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1021 exp.PercentileCont: transforms.preprocess( 1022 [transforms.add_within_group_for_percentiles] 1023 ), 1024 exp.PercentileDisc: transforms.preprocess( 1025 [transforms.add_within_group_for_percentiles] 1026 ), 1027 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1028 exp.RegexpExtract: _regexpextract_sql, 1029 exp.RegexpExtractAll: _regexpextract_sql, 1030 exp.RegexpILike: _regexpilike_sql, 1031 exp.Rand: rename_func("RANDOM"), 1032 exp.Select: transforms.preprocess( 1033 [ 1034 transforms.eliminate_distinct_on, 1035 transforms.explode_projection_to_unnest(), 1036 transforms.eliminate_semi_and_anti_joins, 1037 _transform_generate_date_array, 1038 ] 1039 ), 1040 exp.SHA: rename_func("SHA1"), 1041 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1042 exp.StartsWith: rename_func("STARTSWITH"), 1043 exp.StrPosition: lambda self, e: strposition_sql( 1044 self, e, func_name="CHARINDEX", supports_position=True 1045 ), 1046 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1047 exp.Stuff: rename_func("INSERT"), 1048 exp.TimeAdd: date_delta_sql("TIMEADD"), 1049 exp.Timestamp: no_timestamp_sql, 1050 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1051 exp.TimestampDiff: lambda self, e: self.func( 1052 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1053 ), 1054 exp.TimestampTrunc: timestamptrunc_sql(), 1055 exp.TimeStrToTime: timestrtotime_sql, 1056 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1057 exp.ToArray: rename_func("TO_ARRAY"), 1058 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1059 exp.ToDouble: rename_func("TO_DOUBLE"), 1060 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1061 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1062 exp.TsOrDsToDate: lambda self, e: self.func( 1063 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1064 ), 1065 exp.TsOrDsToTime: lambda self, e: self.func( 1066 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1067 ), 1068 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1069 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1070 exp.Uuid: rename_func("UUID_STRING"), 1071 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1072 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1073 exp.Xor: rename_func("BOOLXOR"), 1074 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1075 rename_func("EDITDISTANCE") 1076 ), 1077 } 1078 1079 SUPPORTED_JSON_PATH_PARTS = { 1080 exp.JSONPathKey, 1081 exp.JSONPathRoot, 1082 exp.JSONPathSubscript, 1083 } 1084 1085 TYPE_MAPPING = { 1086 **generator.Generator.TYPE_MAPPING, 1087 exp.DataType.Type.NESTED: "OBJECT", 1088 exp.DataType.Type.STRUCT: "OBJECT", 1089 } 1090 1091 TOKEN_MAPPING = { 1092 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1093 } 1094 1095 PROPERTIES_LOCATION = { 1096 **generator.Generator.PROPERTIES_LOCATION, 1097 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1098 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1099 } 1100 1101 UNSUPPORTED_VALUES_EXPRESSIONS = { 1102 exp.Map, 1103 exp.StarMap, 1104 exp.Struct, 1105 exp.VarMap, 1106 } 1107 1108 def with_properties(self, properties: exp.Properties) -> str: 1109 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1110 1111 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1112 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1113 values_as_table = False 1114 1115 return super().values_sql(expression, values_as_table=values_as_table) 1116 1117 def datatype_sql(self, expression: exp.DataType) -> str: 1118 expressions = expression.expressions 1119 if ( 1120 expressions 1121 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1122 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1123 ): 1124 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1125 return "OBJECT" 1126 1127 return super().datatype_sql(expression) 1128 1129 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1130 return self.func( 1131 "TO_NUMBER", 1132 expression.this, 1133 expression.args.get("format"), 1134 expression.args.get("precision"), 1135 expression.args.get("scale"), 1136 ) 1137 1138 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1139 milli = expression.args.get("milli") 1140 if milli is not None: 1141 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1142 expression.set("nano", milli_to_nano) 1143 1144 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1145 1146 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1147 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1148 return self.func("TO_GEOGRAPHY", expression.this) 1149 if expression.is_type(exp.DataType.Type.GEOMETRY): 1150 return self.func("TO_GEOMETRY", expression.this) 1151 1152 return super().cast_sql(expression, safe_prefix=safe_prefix) 1153 1154 def trycast_sql(self, expression: exp.TryCast) -> str: 1155 value = expression.this 1156 1157 if value.type is None: 1158 from sqlglot.optimizer.annotate_types import annotate_types 1159 1160 value = annotate_types(value) 1161 1162 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1163 return super().trycast_sql(expression) 1164 1165 # TRY_CAST only works for string values in Snowflake 1166 return self.cast_sql(expression) 1167 1168 def log_sql(self, expression: exp.Log) -> str: 1169 if not expression.expression: 1170 return self.func("LN", expression.this) 1171 1172 return super().log_sql(expression) 1173 1174 def unnest_sql(self, expression: exp.Unnest) -> str: 1175 unnest_alias = expression.args.get("alias") 1176 offset = expression.args.get("offset") 1177 1178 columns = [ 1179 exp.to_identifier("seq"), 1180 exp.to_identifier("key"), 1181 exp.to_identifier("path"), 1182 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1183 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1184 or exp.to_identifier("value"), 1185 exp.to_identifier("this"), 1186 ] 1187 1188 if unnest_alias: 1189 unnest_alias.set("columns", columns) 1190 else: 1191 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1192 1193 table_input = self.sql(expression.expressions[0]) 1194 if not table_input.startswith("INPUT =>"): 1195 table_input = f"INPUT => {table_input}" 1196 1197 explode = f"TABLE(FLATTEN({table_input}))" 1198 alias = self.sql(unnest_alias) 1199 alias = f" AS {alias}" if alias else "" 1200 return f"{explode}{alias}" 1201 1202 def show_sql(self, expression: exp.Show) -> str: 1203 terse = "TERSE " if expression.args.get("terse") else "" 1204 history = " HISTORY" if expression.args.get("history") else "" 1205 like = self.sql(expression, "like") 1206 like = f" LIKE {like}" if like else "" 1207 1208 scope = self.sql(expression, "scope") 1209 scope = f" {scope}" if scope else "" 1210 1211 scope_kind = self.sql(expression, "scope_kind") 1212 if scope_kind: 1213 scope_kind = f" IN {scope_kind}" 1214 1215 starts_with = self.sql(expression, "starts_with") 1216 if starts_with: 1217 starts_with = f" STARTS WITH {starts_with}" 1218 1219 limit = self.sql(expression, "limit") 1220 1221 from_ = self.sql(expression, "from") 1222 if from_: 1223 from_ = f" FROM {from_}" 1224 1225 privileges = self.expressions(expression, key="privileges", flat=True) 1226 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1227 1228 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1229 1230 def describe_sql(self, expression: exp.Describe) -> str: 1231 # Default to table if kind is unknown 1232 kind_value = expression.args.get("kind") or "TABLE" 1233 kind = f" {kind_value}" if kind_value else "" 1234 this = f" {self.sql(expression, 'this')}" 1235 expressions = self.expressions(expression, flat=True) 1236 expressions = f" {expressions}" if expressions else "" 1237 return f"DESCRIBE{kind}{this}{expressions}" 1238 1239 def generatedasidentitycolumnconstraint_sql( 1240 self, expression: exp.GeneratedAsIdentityColumnConstraint 1241 ) -> str: 1242 start = expression.args.get("start") 1243 start = f" START {start}" if start else "" 1244 increment = expression.args.get("increment") 1245 increment = f" INCREMENT {increment}" if increment else "" 1246 return f"AUTOINCREMENT{start}{increment}" 1247 1248 def cluster_sql(self, expression: exp.Cluster) -> str: 1249 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1250 1251 def struct_sql(self, expression: exp.Struct) -> str: 1252 keys = [] 1253 values = [] 1254 1255 for i, e in enumerate(expression.expressions): 1256 if isinstance(e, exp.PropertyEQ): 1257 keys.append( 1258 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1259 ) 1260 values.append(e.expression) 1261 else: 1262 keys.append(exp.Literal.string(f"_{i}")) 1263 values.append(e) 1264 1265 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1266 1267 @unsupported_args("weight", "accuracy") 1268 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1269 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1270 1271 def alterset_sql(self, expression: exp.AlterSet) -> str: 1272 exprs = self.expressions(expression, flat=True) 1273 exprs = f" {exprs}" if exprs else "" 1274 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1275 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1276 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1277 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1278 tag = self.expressions(expression, key="tag", flat=True) 1279 tag = f" TAG {tag}" if tag else "" 1280 1281 return f"SET{exprs}{file_format}{copy_options}{tag}" 1282 1283 def strtotime_sql(self, expression: exp.StrToTime): 1284 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1285 return self.func( 1286 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1287 ) 1288 1289 def timestampsub_sql(self, expression: exp.TimestampSub): 1290 return self.sql( 1291 exp.TimestampAdd( 1292 this=expression.this, 1293 expression=expression.expression * -1, 1294 unit=expression.unit, 1295 ) 1296 ) 1297 1298 def jsonextract_sql(self, expression: exp.JSONExtract): 1299 this = expression.this 1300 1301 # JSON strings are valid coming from other dialects such as BQ 1302 return self.func( 1303 "GET_PATH", 1304 exp.ParseJSON(this=this) if this.is_string else this, 1305 expression.expression, 1306 ) 1307 1308 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1309 this = expression.this 1310 if not isinstance(this, exp.TsOrDsToTimestamp): 1311 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1312 1313 return self.func("TO_CHAR", this, self.format_time(expression)) 1314 1315 def datesub_sql(self, expression: exp.DateSub) -> str: 1316 value = expression.expression 1317 if value: 1318 value.replace(value * (-1)) 1319 else: 1320 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1321 1322 return date_delta_sql("DATEADD")(self, expression) 1323 1324 def select_sql(self, expression: exp.Select) -> str: 1325 limit = expression.args.get("limit") 1326 offset = expression.args.get("offset") 1327 if offset and not limit: 1328 expression.limit(exp.Null(), copy=False) 1329 return super().select_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1117 def datatype_sql(self, expression: exp.DataType) -> str: 1118 expressions = expression.expressions 1119 if ( 1120 expressions 1121 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1122 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1123 ): 1124 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1125 return "OBJECT" 1126 1127 return super().datatype_sql(expression)
1138 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1139 milli = expression.args.get("milli") 1140 if milli is not None: 1141 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1142 expression.set("nano", milli_to_nano) 1143 1144 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1146 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1147 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1148 return self.func("TO_GEOGRAPHY", expression.this) 1149 if expression.is_type(exp.DataType.Type.GEOMETRY): 1150 return self.func("TO_GEOMETRY", expression.this) 1151 1152 return super().cast_sql(expression, safe_prefix=safe_prefix)
1154 def trycast_sql(self, expression: exp.TryCast) -> str: 1155 value = expression.this 1156 1157 if value.type is None: 1158 from sqlglot.optimizer.annotate_types import annotate_types 1159 1160 value = annotate_types(value) 1161 1162 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1163 return super().trycast_sql(expression) 1164 1165 # TRY_CAST only works for string values in Snowflake 1166 return self.cast_sql(expression)
1174 def unnest_sql(self, expression: exp.Unnest) -> str: 1175 unnest_alias = expression.args.get("alias") 1176 offset = expression.args.get("offset") 1177 1178 columns = [ 1179 exp.to_identifier("seq"), 1180 exp.to_identifier("key"), 1181 exp.to_identifier("path"), 1182 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1183 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1184 or exp.to_identifier("value"), 1185 exp.to_identifier("this"), 1186 ] 1187 1188 if unnest_alias: 1189 unnest_alias.set("columns", columns) 1190 else: 1191 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1192 1193 table_input = self.sql(expression.expressions[0]) 1194 if not table_input.startswith("INPUT =>"): 1195 table_input = f"INPUT => {table_input}" 1196 1197 explode = f"TABLE(FLATTEN({table_input}))" 1198 alias = self.sql(unnest_alias) 1199 alias = f" AS {alias}" if alias else "" 1200 return f"{explode}{alias}"
1202 def show_sql(self, expression: exp.Show) -> str: 1203 terse = "TERSE " if expression.args.get("terse") else "" 1204 history = " HISTORY" if expression.args.get("history") else "" 1205 like = self.sql(expression, "like") 1206 like = f" LIKE {like}" if like else "" 1207 1208 scope = self.sql(expression, "scope") 1209 scope = f" {scope}" if scope else "" 1210 1211 scope_kind = self.sql(expression, "scope_kind") 1212 if scope_kind: 1213 scope_kind = f" IN {scope_kind}" 1214 1215 starts_with = self.sql(expression, "starts_with") 1216 if starts_with: 1217 starts_with = f" STARTS WITH {starts_with}" 1218 1219 limit = self.sql(expression, "limit") 1220 1221 from_ = self.sql(expression, "from") 1222 if from_: 1223 from_ = f" FROM {from_}" 1224 1225 privileges = self.expressions(expression, key="privileges", flat=True) 1226 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1227 1228 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1230 def describe_sql(self, expression: exp.Describe) -> str: 1231 # Default to table if kind is unknown 1232 kind_value = expression.args.get("kind") or "TABLE" 1233 kind = f" {kind_value}" if kind_value else "" 1234 this = f" {self.sql(expression, 'this')}" 1235 expressions = self.expressions(expression, flat=True) 1236 expressions = f" {expressions}" if expressions else "" 1237 return f"DESCRIBE{kind}{this}{expressions}"
1239 def generatedasidentitycolumnconstraint_sql( 1240 self, expression: exp.GeneratedAsIdentityColumnConstraint 1241 ) -> str: 1242 start = expression.args.get("start") 1243 start = f" START {start}" if start else "" 1244 increment = expression.args.get("increment") 1245 increment = f" INCREMENT {increment}" if increment else "" 1246 return f"AUTOINCREMENT{start}{increment}"
1251 def struct_sql(self, expression: exp.Struct) -> str: 1252 keys = [] 1253 values = [] 1254 1255 for i, e in enumerate(expression.expressions): 1256 if isinstance(e, exp.PropertyEQ): 1257 keys.append( 1258 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1259 ) 1260 values.append(e.expression) 1261 else: 1262 keys.append(exp.Literal.string(f"_{i}")) 1263 values.append(e) 1264 1265 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1271 def alterset_sql(self, expression: exp.AlterSet) -> str: 1272 exprs = self.expressions(expression, flat=True) 1273 exprs = f" {exprs}" if exprs else "" 1274 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1275 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1276 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1277 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1278 tag = self.expressions(expression, key="tag", flat=True) 1279 tag = f" TAG {tag}" if tag else "" 1280 1281 return f"SET{exprs}{file_format}{copy_options}{tag}"
1315 def datesub_sql(self, expression: exp.DateSub) -> str: 1316 value = expression.expression 1317 if value: 1318 value.replace(value * (-1)) 1319 else: 1320 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1321 1322 return date_delta_sql("DATEADD")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- put_sql