sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind)) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ).as_(sequence_value_name) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 257 258 259def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 260 if isinstance(expression, exp.Select): 261 for generate_date_array in expression.find_all(exp.GenerateDateArray): 262 parent = generate_date_array.parent 263 264 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 265 # query is the following (it'll be unnested properly on the next iteration due to copy): 266 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 267 if not isinstance(parent, exp.Unnest): 268 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 269 generate_date_array.replace( 270 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 271 ) 272 273 if ( 274 isinstance(parent, exp.Unnest) 275 and isinstance(parent.parent, (exp.From, exp.Join)) 276 and len(parent.expressions) == 1 277 ): 278 _unnest_generate_date_array(parent) 279 280 return expression 281 282 283def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 284 def _builder(args: t.List) -> E: 285 return expr_type( 286 this=seq_get(args, 0), 287 expression=seq_get(args, 1), 288 position=seq_get(args, 2), 289 occurrence=seq_get(args, 3), 290 parameters=seq_get(args, 4), 291 group=seq_get(args, 5) or exp.Literal.number(0), 292 ) 293 294 return _builder 295 296 297def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 298 # Other dialects don't support all of the following parameters, so we need to 299 # generate default values as necessary to ensure the transpilation is correct 300 group = expression.args.get("group") 301 302 # To avoid generating all these default values, we set group to None if 303 # it's 0 (also default value) which doesn't trigger the following chain 304 if group and group.name == "0": 305 group = None 306 307 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 308 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 309 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 310 311 return self.func( 312 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 313 expression.this, 314 expression.expression, 315 position, 316 occurrence, 317 parameters, 318 group, 319 ) 320 321 322def _json_extract_value_array_sql( 323 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 324) -> str: 325 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 326 ident = exp.to_identifier("x") 327 328 if isinstance(expression, exp.JSONValueArray): 329 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 330 else: 331 this = exp.ParseJSON(this=f"TO_JSON({ident})") 332 333 transform_lambda = exp.Lambda(expressions=[ident], this=this) 334 335 return self.func("TRANSFORM", json_extract, transform_lambda) 336 337 338def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 339 if isinstance(expression, exp.Select): 340 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 341 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 342 # by Snowflake's parser. 343 # 344 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 345 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 346 unnest_aliases = set() 347 for unnest in find_all_in_scope(expression, exp.Unnest): 348 unnest_alias = unnest.args.get("alias") 349 if ( 350 isinstance(unnest_alias, exp.TableAlias) 351 and not unnest_alias.this 352 and len(unnest_alias.columns) == 1 353 ): 354 unnest_aliases.add(unnest_alias.columns[0].name) 355 356 if unnest_aliases: 357 for c in find_all_in_scope(expression, exp.Column): 358 if c.table in unnest_aliases: 359 bracket_lhs = c.args["table"] 360 bracket_rhs = exp.Literal.string(c.name) 361 c.replace(exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs])) 362 363 return expression 364 365 366class Snowflake(Dialect): 367 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 368 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 369 NULL_ORDERING = "nulls_are_large" 370 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 371 SUPPORTS_USER_DEFINED_TYPES = False 372 SUPPORTS_SEMI_ANTI_JOIN = False 373 PREFER_CTE_ALIAS_COLUMN = True 374 TABLESAMPLE_SIZE_IS_PERCENT = True 375 COPY_PARAMS_ARE_CSV = False 376 ARRAY_AGG_INCLUDES_NULLS = None 377 378 TIME_MAPPING = { 379 "YYYY": "%Y", 380 "yyyy": "%Y", 381 "YY": "%y", 382 "yy": "%y", 383 "MMMM": "%B", 384 "mmmm": "%B", 385 "MON": "%b", 386 "mon": "%b", 387 "MM": "%m", 388 "mm": "%m", 389 "DD": "%d", 390 "dd": "%-d", 391 "DY": "%a", 392 "dy": "%w", 393 "HH24": "%H", 394 "hh24": "%H", 395 "HH12": "%I", 396 "hh12": "%I", 397 "MI": "%M", 398 "mi": "%M", 399 "SS": "%S", 400 "ss": "%S", 401 "FF6": "%f", 402 "ff6": "%f", 403 } 404 405 DATE_PART_MAPPING = { 406 **Dialect.DATE_PART_MAPPING, 407 "ISOWEEK": "WEEKISO", 408 } 409 410 def quote_identifier(self, expression: E, identify: bool = True) -> E: 411 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 412 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 413 if ( 414 isinstance(expression, exp.Identifier) 415 and isinstance(expression.parent, exp.Table) 416 and expression.name.lower() == "dual" 417 ): 418 return expression # type: ignore 419 420 return super().quote_identifier(expression, identify=identify) 421 422 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 423 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 424 SINGLE_TOKENS.pop("$") 425 426 class Parser(parser.Parser): 427 IDENTIFY_PIVOT_STRINGS = True 428 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 429 COLON_IS_VARIANT_EXTRACT = True 430 431 ID_VAR_TOKENS = { 432 *parser.Parser.ID_VAR_TOKENS, 433 TokenType.MATCH_CONDITION, 434 } 435 436 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 437 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 438 439 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 440 441 FUNCTIONS = { 442 **parser.Parser.FUNCTIONS, 443 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 444 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 445 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 446 this=seq_get(args, 1), expression=seq_get(args, 0) 447 ), 448 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 449 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 450 start=seq_get(args, 0), 451 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 452 step=seq_get(args, 2), 453 ), 454 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 455 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 456 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 457 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 458 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 459 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 460 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 461 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 462 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 463 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 464 "DATE_TRUNC": _date_trunc_to_time, 465 "DATEADD": _build_date_time_add(exp.DateAdd), 466 "DATEDIFF": _build_datediff, 467 "DIV0": _build_if_from_div0, 468 "EDITDISTANCE": lambda args: exp.Levenshtein( 469 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 470 ), 471 "FLATTEN": exp.Explode.from_arg_list, 472 "GET_PATH": lambda args, dialect: exp.JSONExtract( 473 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 474 ), 475 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 476 "IFF": exp.If.from_arg_list, 477 "LAST_DAY": lambda args: exp.LastDay( 478 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 479 ), 480 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 481 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 482 "NULLIFZERO": _build_if_from_nullifzero, 483 "OBJECT_CONSTRUCT": _build_object_construct, 484 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 485 "REGEXP_REPLACE": _build_regexp_replace, 486 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 487 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 488 "REPLACE": build_replace_with_optional_replacement, 489 "RLIKE": exp.RegexpLike.from_arg_list, 490 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 491 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 492 "TIMEADD": _build_date_time_add(exp.TimeAdd), 493 "TIMEDIFF": _build_datediff, 494 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 495 "TIMESTAMPDIFF": _build_datediff, 496 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 497 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 498 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 499 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 500 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 501 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 502 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 503 "TRY_TO_TIMESTAMP": _build_datetime( 504 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 505 ), 506 "TO_CHAR": build_timetostr_or_tochar, 507 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 508 "TO_NUMBER": lambda args: exp.ToNumber( 509 this=seq_get(args, 0), 510 format=seq_get(args, 1), 511 precision=seq_get(args, 2), 512 scale=seq_get(args, 3), 513 ), 514 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 515 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 516 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 517 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 518 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 519 "TO_VARCHAR": exp.ToChar.from_arg_list, 520 "ZEROIFNULL": _build_if_from_zeroifnull, 521 } 522 523 FUNCTION_PARSERS = { 524 **parser.Parser.FUNCTION_PARSERS, 525 "DATE_PART": lambda self: self._parse_date_part(), 526 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 527 "LISTAGG": lambda self: self._parse_string_agg(), 528 } 529 FUNCTION_PARSERS.pop("TRIM") 530 531 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 532 533 RANGE_PARSERS = { 534 **parser.Parser.RANGE_PARSERS, 535 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 536 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 537 } 538 539 ALTER_PARSERS = { 540 **parser.Parser.ALTER_PARSERS, 541 "UNSET": lambda self: self.expression( 542 exp.Set, 543 tag=self._match_text_seq("TAG"), 544 expressions=self._parse_csv(self._parse_id_var), 545 unset=True, 546 ), 547 } 548 549 STATEMENT_PARSERS = { 550 **parser.Parser.STATEMENT_PARSERS, 551 TokenType.GET: lambda self: self._parse_get(), 552 TokenType.PUT: lambda self: self._parse_put(), 553 TokenType.SHOW: lambda self: self._parse_show(), 554 } 555 556 PROPERTY_PARSERS = { 557 **parser.Parser.PROPERTY_PARSERS, 558 "CREDENTIALS": lambda self: self._parse_credentials_property(), 559 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 560 "LOCATION": lambda self: self._parse_location_property(), 561 "TAG": lambda self: self._parse_tag(), 562 "USING": lambda self: self._match_text_seq("TEMPLATE") 563 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 564 } 565 566 TYPE_CONVERTERS = { 567 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 568 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 569 } 570 571 SHOW_PARSERS = { 572 "DATABASES": _show_parser("DATABASES"), 573 "TERSE DATABASES": _show_parser("DATABASES"), 574 "SCHEMAS": _show_parser("SCHEMAS"), 575 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 576 "OBJECTS": _show_parser("OBJECTS"), 577 "TERSE OBJECTS": _show_parser("OBJECTS"), 578 "TABLES": _show_parser("TABLES"), 579 "TERSE TABLES": _show_parser("TABLES"), 580 "VIEWS": _show_parser("VIEWS"), 581 "TERSE VIEWS": _show_parser("VIEWS"), 582 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 583 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 584 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 585 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 586 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 587 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 588 "SEQUENCES": _show_parser("SEQUENCES"), 589 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 590 "STAGES": _show_parser("STAGES"), 591 "COLUMNS": _show_parser("COLUMNS"), 592 "USERS": _show_parser("USERS"), 593 "TERSE USERS": _show_parser("USERS"), 594 "FILE FORMATS": _show_parser("FILE FORMATS"), 595 "FUNCTIONS": _show_parser("FUNCTIONS"), 596 "PROCEDURES": _show_parser("PROCEDURES"), 597 "WAREHOUSES": _show_parser("WAREHOUSES"), 598 } 599 600 CONSTRAINT_PARSERS = { 601 **parser.Parser.CONSTRAINT_PARSERS, 602 "WITH": lambda self: self._parse_with_constraint(), 603 "MASKING": lambda self: self._parse_with_constraint(), 604 "PROJECTION": lambda self: self._parse_with_constraint(), 605 "TAG": lambda self: self._parse_with_constraint(), 606 } 607 608 STAGED_FILE_SINGLE_TOKENS = { 609 TokenType.DOT, 610 TokenType.MOD, 611 TokenType.SLASH, 612 } 613 614 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 615 616 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 617 618 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 619 620 LAMBDAS = { 621 **parser.Parser.LAMBDAS, 622 TokenType.ARROW: lambda self, expressions: self.expression( 623 exp.Lambda, 624 this=self._replace_lambda( 625 self._parse_assignment(), 626 expressions, 627 ), 628 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 629 ), 630 } 631 632 def _parse_use(self) -> exp.Use: 633 if self._match_text_seq("SECONDARY", "ROLES"): 634 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 635 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 636 return self.expression( 637 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 638 ) 639 640 return super()._parse_use() 641 642 def _negate_range( 643 self, this: t.Optional[exp.Expression] = None 644 ) -> t.Optional[exp.Expression]: 645 if not this: 646 return this 647 648 query = this.args.get("query") 649 if isinstance(this, exp.In) and isinstance(query, exp.Query): 650 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 651 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 652 # which can produce different results (most likely a SnowFlake bug). 653 # 654 # https://docs.snowflake.com/en/sql-reference/functions/in 655 # Context: https://github.com/tobymao/sqlglot/issues/3890 656 return self.expression( 657 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 658 ) 659 660 return self.expression(exp.Not, this=this) 661 662 def _parse_tag(self) -> exp.Tags: 663 return self.expression( 664 exp.Tags, 665 expressions=self._parse_wrapped_csv(self._parse_property), 666 ) 667 668 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 669 if self._prev.token_type != TokenType.WITH: 670 self._retreat(self._index - 1) 671 672 if self._match_text_seq("MASKING", "POLICY"): 673 policy = self._parse_column() 674 return self.expression( 675 exp.MaskingPolicyColumnConstraint, 676 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 677 expressions=self._match(TokenType.USING) 678 and self._parse_wrapped_csv(self._parse_id_var), 679 ) 680 if self._match_text_seq("PROJECTION", "POLICY"): 681 policy = self._parse_column() 682 return self.expression( 683 exp.ProjectionPolicyColumnConstraint, 684 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 685 ) 686 if self._match(TokenType.TAG): 687 return self._parse_tag() 688 689 return None 690 691 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 692 if self._match(TokenType.TAG): 693 return self._parse_tag() 694 695 return super()._parse_with_property() 696 697 def _parse_create(self) -> exp.Create | exp.Command: 698 expression = super()._parse_create() 699 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 700 # Replace the Table node with the enclosed Identifier 701 expression.this.replace(expression.this.this) 702 703 return expression 704 705 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 706 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 707 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 708 this = self._parse_var() or self._parse_type() 709 710 if not this: 711 return None 712 713 self._match(TokenType.COMMA) 714 expression = self._parse_bitwise() 715 this = map_date_part(this) 716 name = this.name.upper() 717 718 if name.startswith("EPOCH"): 719 if name == "EPOCH_MILLISECOND": 720 scale = 10**3 721 elif name == "EPOCH_MICROSECOND": 722 scale = 10**6 723 elif name == "EPOCH_NANOSECOND": 724 scale = 10**9 725 else: 726 scale = None 727 728 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 729 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 730 731 if scale: 732 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 733 734 return to_unix 735 736 return self.expression(exp.Extract, this=this, expression=expression) 737 738 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 739 if is_map: 740 # Keys are strings in Snowflake's objects, see also: 741 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 742 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 743 return self._parse_slice(self._parse_string()) 744 745 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 746 747 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 748 lateral = super()._parse_lateral() 749 if not lateral: 750 return lateral 751 752 if isinstance(lateral.this, exp.Explode): 753 table_alias = lateral.args.get("alias") 754 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 755 if table_alias and not table_alias.args.get("columns"): 756 table_alias.set("columns", columns) 757 elif not table_alias: 758 exp.alias_(lateral, "_flattened", table=columns, copy=False) 759 760 return lateral 761 762 def _parse_table_parts( 763 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 764 ) -> exp.Table: 765 # https://docs.snowflake.com/en/user-guide/querying-stage 766 if self._match(TokenType.STRING, advance=False): 767 table = self._parse_string() 768 elif self._match_text_seq("@", advance=False): 769 table = self._parse_location_path() 770 else: 771 table = None 772 773 if table: 774 file_format = None 775 pattern = None 776 777 wrapped = self._match(TokenType.L_PAREN) 778 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 779 if self._match_text_seq("FILE_FORMAT", "=>"): 780 file_format = self._parse_string() or super()._parse_table_parts( 781 is_db_reference=is_db_reference 782 ) 783 elif self._match_text_seq("PATTERN", "=>"): 784 pattern = self._parse_string() 785 else: 786 break 787 788 self._match(TokenType.COMMA) 789 790 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 791 else: 792 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 793 794 return table 795 796 def _parse_table( 797 self, 798 schema: bool = False, 799 joins: bool = False, 800 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 801 parse_bracket: bool = False, 802 is_db_reference: bool = False, 803 parse_partition: bool = False, 804 consume_pipe: bool = False, 805 ) -> t.Optional[exp.Expression]: 806 table = super()._parse_table( 807 schema=schema, 808 joins=joins, 809 alias_tokens=alias_tokens, 810 parse_bracket=parse_bracket, 811 is_db_reference=is_db_reference, 812 parse_partition=parse_partition, 813 ) 814 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 815 table_from_rows = table.this 816 for arg in exp.TableFromRows.arg_types: 817 if arg != "this": 818 table_from_rows.set(arg, table.args.get(arg)) 819 820 table = table_from_rows 821 822 return table 823 824 def _parse_id_var( 825 self, 826 any_token: bool = True, 827 tokens: t.Optional[t.Collection[TokenType]] = None, 828 ) -> t.Optional[exp.Expression]: 829 if self._match_text_seq("IDENTIFIER", "("): 830 identifier = ( 831 super()._parse_id_var(any_token=any_token, tokens=tokens) 832 or self._parse_string() 833 ) 834 self._match_r_paren() 835 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 836 837 return super()._parse_id_var(any_token=any_token, tokens=tokens) 838 839 def _parse_show_snowflake(self, this: str) -> exp.Show: 840 scope = None 841 scope_kind = None 842 843 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 844 # which is syntactically valid but has no effect on the output 845 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 846 847 history = self._match_text_seq("HISTORY") 848 849 like = self._parse_string() if self._match(TokenType.LIKE) else None 850 851 if self._match(TokenType.IN): 852 if self._match_text_seq("ACCOUNT"): 853 scope_kind = "ACCOUNT" 854 elif self._match_text_seq("CLASS"): 855 scope_kind = "CLASS" 856 scope = self._parse_table_parts() 857 elif self._match_text_seq("APPLICATION"): 858 scope_kind = "APPLICATION" 859 if self._match_text_seq("PACKAGE"): 860 scope_kind += " PACKAGE" 861 scope = self._parse_table_parts() 862 elif self._match_set(self.DB_CREATABLES): 863 scope_kind = self._prev.text.upper() 864 if self._curr: 865 scope = self._parse_table_parts() 866 elif self._curr: 867 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 868 scope = self._parse_table_parts() 869 870 return self.expression( 871 exp.Show, 872 **{ 873 "terse": terse, 874 "this": this, 875 "history": history, 876 "like": like, 877 "scope": scope, 878 "scope_kind": scope_kind, 879 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 880 "limit": self._parse_limit(), 881 "from": self._parse_string() if self._match(TokenType.FROM) else None, 882 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 883 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 884 }, 885 ) 886 887 def _parse_put(self) -> exp.Put | exp.Command: 888 if self._curr.token_type != TokenType.STRING: 889 return self._parse_as_command(self._prev) 890 891 return self.expression( 892 exp.Put, 893 this=self._parse_string(), 894 target=self._parse_location_path(), 895 properties=self._parse_properties(), 896 ) 897 898 def _parse_get(self) -> t.Optional[exp.Expression]: 899 start = self._prev 900 901 # If we detect GET( then we need to parse a function, not a statement 902 if self._match(TokenType.L_PAREN): 903 self._retreat(self._index - 2) 904 return self._parse_expression() 905 906 target = self._parse_location_path() 907 908 # Parse as command if unquoted file path 909 if self._curr.token_type == TokenType.URI_START: 910 return self._parse_as_command(start) 911 912 return self.expression( 913 exp.Get, 914 this=self._parse_string(), 915 target=target, 916 properties=self._parse_properties(), 917 ) 918 919 def _parse_location_property(self) -> exp.LocationProperty: 920 self._match(TokenType.EQ) 921 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 922 923 def _parse_file_location(self) -> t.Optional[exp.Expression]: 924 # Parse either a subquery or a staged file 925 return ( 926 self._parse_select(table=True, parse_subquery_alias=False) 927 if self._match(TokenType.L_PAREN, advance=False) 928 else self._parse_table_parts() 929 ) 930 931 def _parse_location_path(self) -> exp.Var: 932 start = self._curr 933 self._advance_any(ignore_reserved=True) 934 935 # We avoid consuming a comma token because external tables like @foo and @bar 936 # can be joined in a query with a comma separator, as well as closing paren 937 # in case of subqueries 938 while self._is_connected() and not self._match_set( 939 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 940 ): 941 self._advance_any(ignore_reserved=True) 942 943 return exp.var(self._find_sql(start, self._prev)) 944 945 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 946 this = super()._parse_lambda_arg() 947 948 if not this: 949 return this 950 951 typ = self._parse_types() 952 953 if typ: 954 return self.expression(exp.Cast, this=this, to=typ) 955 956 return this 957 958 def _parse_foreign_key(self) -> exp.ForeignKey: 959 # inlineFK, the REFERENCES columns are implied 960 if self._match(TokenType.REFERENCES, advance=False): 961 return self.expression(exp.ForeignKey) 962 963 # outoflineFK, explicitly names the columns 964 return super()._parse_foreign_key() 965 966 def _parse_file_format_property(self) -> exp.FileFormatProperty: 967 self._match(TokenType.EQ) 968 if self._match(TokenType.L_PAREN, advance=False): 969 expressions = self._parse_wrapped_options() 970 else: 971 expressions = [self._parse_format_name()] 972 973 return self.expression( 974 exp.FileFormatProperty, 975 expressions=expressions, 976 ) 977 978 def _parse_credentials_property(self) -> exp.CredentialsProperty: 979 return self.expression( 980 exp.CredentialsProperty, 981 expressions=self._parse_wrapped_options(), 982 ) 983 984 class Tokenizer(tokens.Tokenizer): 985 STRING_ESCAPES = ["\\", "'"] 986 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 987 RAW_STRINGS = ["$$"] 988 COMMENTS = ["--", "//", ("/*", "*/")] 989 NESTED_COMMENTS = False 990 991 KEYWORDS = { 992 **tokens.Tokenizer.KEYWORDS, 993 "FILE://": TokenType.URI_START, 994 "BYTEINT": TokenType.INT, 995 "EXCLUDE": TokenType.EXCEPT, 996 "FILE FORMAT": TokenType.FILE_FORMAT, 997 "GET": TokenType.GET, 998 "ILIKE ANY": TokenType.ILIKE_ANY, 999 "LIKE ANY": TokenType.LIKE_ANY, 1000 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1001 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1002 "MINUS": TokenType.EXCEPT, 1003 "NCHAR VARYING": TokenType.VARCHAR, 1004 "PUT": TokenType.PUT, 1005 "REMOVE": TokenType.COMMAND, 1006 "RM": TokenType.COMMAND, 1007 "SAMPLE": TokenType.TABLE_SAMPLE, 1008 "SQL_DOUBLE": TokenType.DOUBLE, 1009 "SQL_VARCHAR": TokenType.VARCHAR, 1010 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1011 "TAG": TokenType.TAG, 1012 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1013 "TOP": TokenType.TOP, 1014 "WAREHOUSE": TokenType.WAREHOUSE, 1015 "STAGE": TokenType.STAGE, 1016 "STREAMLIT": TokenType.STREAMLIT, 1017 } 1018 KEYWORDS.pop("/*+") 1019 1020 SINGLE_TOKENS = { 1021 **tokens.Tokenizer.SINGLE_TOKENS, 1022 "$": TokenType.PARAMETER, 1023 } 1024 1025 VAR_SINGLE_TOKENS = {"$"} 1026 1027 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1028 1029 class Generator(generator.Generator): 1030 PARAMETER_TOKEN = "$" 1031 MATCHED_BY_SOURCE = False 1032 SINGLE_STRING_INTERVAL = True 1033 JOIN_HINTS = False 1034 TABLE_HINTS = False 1035 QUERY_HINTS = False 1036 AGGREGATE_FILTER_SUPPORTED = False 1037 SUPPORTS_TABLE_COPY = False 1038 COLLATE_IS_FUNC = True 1039 LIMIT_ONLY_LITERALS = True 1040 JSON_KEY_VALUE_PAIR_SEP = "," 1041 INSERT_OVERWRITE = " OVERWRITE INTO" 1042 STRUCT_DELIMITER = ("(", ")") 1043 COPY_PARAMS_ARE_WRAPPED = False 1044 COPY_PARAMS_EQ_REQUIRED = True 1045 STAR_EXCEPT = "EXCLUDE" 1046 SUPPORTS_EXPLODING_PROJECTIONS = False 1047 ARRAY_CONCAT_IS_VAR_LEN = False 1048 SUPPORTS_CONVERT_TIMEZONE = True 1049 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1050 SUPPORTS_MEDIAN = True 1051 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1052 1053 TRANSFORMS = { 1054 **generator.Generator.TRANSFORMS, 1055 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1056 exp.ArgMax: rename_func("MAX_BY"), 1057 exp.ArgMin: rename_func("MIN_BY"), 1058 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1059 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1060 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1061 exp.AtTimeZone: lambda self, e: self.func( 1062 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1063 ), 1064 exp.BitwiseOr: rename_func("BITOR"), 1065 exp.BitwiseXor: rename_func("BITXOR"), 1066 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1067 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1068 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1069 exp.DateAdd: date_delta_sql("DATEADD"), 1070 exp.DateDiff: date_delta_sql("DATEDIFF"), 1071 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1072 exp.DatetimeDiff: timestampdiff_sql, 1073 exp.DateStrToDate: datestrtodate_sql, 1074 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1075 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1076 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1077 exp.DayOfYear: rename_func("DAYOFYEAR"), 1078 exp.Explode: rename_func("FLATTEN"), 1079 exp.Extract: lambda self, e: self.func( 1080 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1081 ), 1082 exp.FileFormatProperty: lambda self, 1083 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1084 exp.FromTimeZone: lambda self, e: self.func( 1085 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1086 ), 1087 exp.GenerateSeries: lambda self, e: self.func( 1088 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1089 ), 1090 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1091 exp.If: if_sql(name="IFF", false_value="NULL"), 1092 exp.JSONExtractArray: _json_extract_value_array_sql, 1093 exp.JSONExtractScalar: lambda self, e: self.func( 1094 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1095 ), 1096 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1097 exp.JSONPathRoot: lambda *_: "", 1098 exp.JSONValueArray: _json_extract_value_array_sql, 1099 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1100 rename_func("EDITDISTANCE") 1101 ), 1102 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1103 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1104 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1105 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1106 exp.MakeInterval: no_make_interval_sql, 1107 exp.Max: max_or_greatest, 1108 exp.Min: min_or_least, 1109 exp.ParseJSON: lambda self, e: self.func( 1110 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1111 ), 1112 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1113 exp.PercentileCont: transforms.preprocess( 1114 [transforms.add_within_group_for_percentiles] 1115 ), 1116 exp.PercentileDisc: transforms.preprocess( 1117 [transforms.add_within_group_for_percentiles] 1118 ), 1119 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1120 exp.RegexpExtract: _regexpextract_sql, 1121 exp.RegexpExtractAll: _regexpextract_sql, 1122 exp.RegexpILike: _regexpilike_sql, 1123 exp.Rand: rename_func("RANDOM"), 1124 exp.Select: transforms.preprocess( 1125 [ 1126 transforms.eliminate_window_clause, 1127 transforms.eliminate_distinct_on, 1128 transforms.explode_projection_to_unnest(), 1129 transforms.eliminate_semi_and_anti_joins, 1130 _transform_generate_date_array, 1131 _eliminate_dot_variant_lookup, 1132 ] 1133 ), 1134 exp.SHA: rename_func("SHA1"), 1135 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1136 exp.StartsWith: rename_func("STARTSWITH"), 1137 exp.EndsWith: rename_func("ENDSWITH"), 1138 exp.StrPosition: lambda self, e: strposition_sql( 1139 self, e, func_name="CHARINDEX", supports_position=True 1140 ), 1141 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1142 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1143 exp.Stuff: rename_func("INSERT"), 1144 exp.StPoint: rename_func("ST_MAKEPOINT"), 1145 exp.TimeAdd: date_delta_sql("TIMEADD"), 1146 exp.Timestamp: no_timestamp_sql, 1147 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1148 exp.TimestampDiff: lambda self, e: self.func( 1149 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1150 ), 1151 exp.TimestampTrunc: timestamptrunc_sql(), 1152 exp.TimeStrToTime: timestrtotime_sql, 1153 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1154 exp.ToArray: rename_func("TO_ARRAY"), 1155 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1156 exp.ToDouble: rename_func("TO_DOUBLE"), 1157 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1158 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1159 exp.TsOrDsToDate: lambda self, e: self.func( 1160 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1161 ), 1162 exp.TsOrDsToTime: lambda self, e: self.func( 1163 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1164 ), 1165 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1166 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1167 exp.Uuid: rename_func("UUID_STRING"), 1168 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1169 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1170 exp.Xor: rename_func("BOOLXOR"), 1171 } 1172 1173 SUPPORTED_JSON_PATH_PARTS = { 1174 exp.JSONPathKey, 1175 exp.JSONPathRoot, 1176 exp.JSONPathSubscript, 1177 } 1178 1179 TYPE_MAPPING = { 1180 **generator.Generator.TYPE_MAPPING, 1181 exp.DataType.Type.NESTED: "OBJECT", 1182 exp.DataType.Type.STRUCT: "OBJECT", 1183 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1184 } 1185 1186 TOKEN_MAPPING = { 1187 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1188 } 1189 1190 PROPERTIES_LOCATION = { 1191 **generator.Generator.PROPERTIES_LOCATION, 1192 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1193 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1194 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1195 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1196 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1197 } 1198 1199 UNSUPPORTED_VALUES_EXPRESSIONS = { 1200 exp.Map, 1201 exp.StarMap, 1202 exp.Struct, 1203 exp.VarMap, 1204 } 1205 1206 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1207 1208 def with_properties(self, properties: exp.Properties) -> str: 1209 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1210 1211 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1212 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1213 values_as_table = False 1214 1215 return super().values_sql(expression, values_as_table=values_as_table) 1216 1217 def datatype_sql(self, expression: exp.DataType) -> str: 1218 expressions = expression.expressions 1219 if ( 1220 expressions 1221 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1222 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1223 ): 1224 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1225 return "OBJECT" 1226 1227 return super().datatype_sql(expression) 1228 1229 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1230 return self.func( 1231 "TO_NUMBER", 1232 expression.this, 1233 expression.args.get("format"), 1234 expression.args.get("precision"), 1235 expression.args.get("scale"), 1236 ) 1237 1238 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1239 milli = expression.args.get("milli") 1240 if milli is not None: 1241 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1242 expression.set("nano", milli_to_nano) 1243 1244 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1245 1246 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1247 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1248 return self.func("TO_GEOGRAPHY", expression.this) 1249 if expression.is_type(exp.DataType.Type.GEOMETRY): 1250 return self.func("TO_GEOMETRY", expression.this) 1251 1252 return super().cast_sql(expression, safe_prefix=safe_prefix) 1253 1254 def trycast_sql(self, expression: exp.TryCast) -> str: 1255 value = expression.this 1256 1257 if value.type is None: 1258 from sqlglot.optimizer.annotate_types import annotate_types 1259 1260 value = annotate_types(value, dialect=self.dialect) 1261 1262 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1263 return super().trycast_sql(expression) 1264 1265 # TRY_CAST only works for string values in Snowflake 1266 return self.cast_sql(expression) 1267 1268 def log_sql(self, expression: exp.Log) -> str: 1269 if not expression.expression: 1270 return self.func("LN", expression.this) 1271 1272 return super().log_sql(expression) 1273 1274 def unnest_sql(self, expression: exp.Unnest) -> str: 1275 unnest_alias = expression.args.get("alias") 1276 offset = expression.args.get("offset") 1277 1278 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1279 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1280 1281 columns = [ 1282 exp.to_identifier("seq"), 1283 exp.to_identifier("key"), 1284 exp.to_identifier("path"), 1285 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1286 value, 1287 exp.to_identifier("this"), 1288 ] 1289 1290 if unnest_alias: 1291 unnest_alias.set("columns", columns) 1292 else: 1293 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1294 1295 table_input = self.sql(expression.expressions[0]) 1296 if not table_input.startswith("INPUT =>"): 1297 table_input = f"INPUT => {table_input}" 1298 1299 explode = f"TABLE(FLATTEN({table_input}))" 1300 alias = self.sql(unnest_alias) 1301 alias = f" AS {alias}" if alias else "" 1302 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1303 1304 return f"{value}{explode}{alias}" 1305 1306 def show_sql(self, expression: exp.Show) -> str: 1307 terse = "TERSE " if expression.args.get("terse") else "" 1308 history = " HISTORY" if expression.args.get("history") else "" 1309 like = self.sql(expression, "like") 1310 like = f" LIKE {like}" if like else "" 1311 1312 scope = self.sql(expression, "scope") 1313 scope = f" {scope}" if scope else "" 1314 1315 scope_kind = self.sql(expression, "scope_kind") 1316 if scope_kind: 1317 scope_kind = f" IN {scope_kind}" 1318 1319 starts_with = self.sql(expression, "starts_with") 1320 if starts_with: 1321 starts_with = f" STARTS WITH {starts_with}" 1322 1323 limit = self.sql(expression, "limit") 1324 1325 from_ = self.sql(expression, "from") 1326 if from_: 1327 from_ = f" FROM {from_}" 1328 1329 privileges = self.expressions(expression, key="privileges", flat=True) 1330 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1331 1332 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1333 1334 def describe_sql(self, expression: exp.Describe) -> str: 1335 # Default to table if kind is unknown 1336 kind_value = expression.args.get("kind") or "TABLE" 1337 kind = f" {kind_value}" if kind_value else "" 1338 this = f" {self.sql(expression, 'this')}" 1339 expressions = self.expressions(expression, flat=True) 1340 expressions = f" {expressions}" if expressions else "" 1341 return f"DESCRIBE{kind}{this}{expressions}" 1342 1343 def generatedasidentitycolumnconstraint_sql( 1344 self, expression: exp.GeneratedAsIdentityColumnConstraint 1345 ) -> str: 1346 start = expression.args.get("start") 1347 start = f" START {start}" if start else "" 1348 increment = expression.args.get("increment") 1349 increment = f" INCREMENT {increment}" if increment else "" 1350 1351 order = expression.args.get("order") 1352 if order is not None: 1353 order_clause = " ORDER" if order else " NOORDER" 1354 else: 1355 order_clause = "" 1356 1357 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1358 1359 def cluster_sql(self, expression: exp.Cluster) -> str: 1360 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1361 1362 def struct_sql(self, expression: exp.Struct) -> str: 1363 keys = [] 1364 values = [] 1365 1366 for i, e in enumerate(expression.expressions): 1367 if isinstance(e, exp.PropertyEQ): 1368 keys.append( 1369 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1370 ) 1371 values.append(e.expression) 1372 else: 1373 keys.append(exp.Literal.string(f"_{i}")) 1374 values.append(e) 1375 1376 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1377 1378 @unsupported_args("weight", "accuracy") 1379 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1380 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1381 1382 def alterset_sql(self, expression: exp.AlterSet) -> str: 1383 exprs = self.expressions(expression, flat=True) 1384 exprs = f" {exprs}" if exprs else "" 1385 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1386 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1387 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1388 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1389 tag = self.expressions(expression, key="tag", flat=True) 1390 tag = f" TAG {tag}" if tag else "" 1391 1392 return f"SET{exprs}{file_format}{copy_options}{tag}" 1393 1394 def strtotime_sql(self, expression: exp.StrToTime): 1395 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1396 return self.func( 1397 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1398 ) 1399 1400 def timestampsub_sql(self, expression: exp.TimestampSub): 1401 return self.sql( 1402 exp.TimestampAdd( 1403 this=expression.this, 1404 expression=expression.expression * -1, 1405 unit=expression.unit, 1406 ) 1407 ) 1408 1409 def jsonextract_sql(self, expression: exp.JSONExtract): 1410 this = expression.this 1411 1412 # JSON strings are valid coming from other dialects such as BQ 1413 return self.func( 1414 "GET_PATH", 1415 exp.ParseJSON(this=this) if this.is_string else this, 1416 expression.expression, 1417 ) 1418 1419 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1420 this = expression.this 1421 if this.is_string: 1422 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1423 1424 return self.func("TO_CHAR", this, self.format_time(expression)) 1425 1426 def datesub_sql(self, expression: exp.DateSub) -> str: 1427 value = expression.expression 1428 if value: 1429 value.replace(value * (-1)) 1430 else: 1431 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1432 1433 return date_delta_sql("DATEADD")(self, expression) 1434 1435 def select_sql(self, expression: exp.Select) -> str: 1436 limit = expression.args.get("limit") 1437 offset = expression.args.get("offset") 1438 if offset and not limit: 1439 expression.limit(exp.Null(), copy=False) 1440 return super().select_sql(expression) 1441 1442 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1443 is_materialized = expression.find(exp.MaterializedProperty) 1444 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1445 1446 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1447 # For materialized views, COPY GRANTS is located *before* the columns list 1448 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1449 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1450 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1451 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1452 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1453 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1454 1455 this_name = self.sql(expression.this, "this") 1456 copy_grants = self.sql(copy_grants_property) 1457 this_schema = self.schema_columns_sql(expression.this) 1458 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1459 1460 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1461 1462 return super().createable_sql(expression, locations) 1463 1464 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1465 this = expression.this 1466 1467 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1468 # and add it later as part of the WITHIN GROUP clause 1469 order = this if isinstance(this, exp.Order) else None 1470 if order: 1471 expression.set("this", order.this.pop()) 1472 1473 expr_sql = super().arrayagg_sql(expression) 1474 1475 if order: 1476 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1477 1478 return expr_sql 1479 1480 def array_sql(self, expression: exp.Array) -> str: 1481 expressions = expression.expressions 1482 1483 first_expr = seq_get(expressions, 0) 1484 if isinstance(first_expr, exp.Select): 1485 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1486 if first_expr.text("kind").upper() == "STRUCT": 1487 object_construct_args = [] 1488 for expr in first_expr.expressions: 1489 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1490 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1491 name = expr.this if isinstance(expr, exp.Alias) else expr 1492 1493 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1494 1495 array_agg = exp.ArrayAgg( 1496 this=_build_object_construct(args=object_construct_args) 1497 ) 1498 1499 first_expr.set("kind", None) 1500 first_expr.set("expressions", [array_agg]) 1501 1502 return self.sql(first_expr.subquery()) 1503 1504 return inline_array_sql(self, expression)
367class Snowflake(Dialect): 368 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 369 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 370 NULL_ORDERING = "nulls_are_large" 371 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 372 SUPPORTS_USER_DEFINED_TYPES = False 373 SUPPORTS_SEMI_ANTI_JOIN = False 374 PREFER_CTE_ALIAS_COLUMN = True 375 TABLESAMPLE_SIZE_IS_PERCENT = True 376 COPY_PARAMS_ARE_CSV = False 377 ARRAY_AGG_INCLUDES_NULLS = None 378 379 TIME_MAPPING = { 380 "YYYY": "%Y", 381 "yyyy": "%Y", 382 "YY": "%y", 383 "yy": "%y", 384 "MMMM": "%B", 385 "mmmm": "%B", 386 "MON": "%b", 387 "mon": "%b", 388 "MM": "%m", 389 "mm": "%m", 390 "DD": "%d", 391 "dd": "%-d", 392 "DY": "%a", 393 "dy": "%w", 394 "HH24": "%H", 395 "hh24": "%H", 396 "HH12": "%I", 397 "hh12": "%I", 398 "MI": "%M", 399 "mi": "%M", 400 "SS": "%S", 401 "ss": "%S", 402 "FF6": "%f", 403 "ff6": "%f", 404 } 405 406 DATE_PART_MAPPING = { 407 **Dialect.DATE_PART_MAPPING, 408 "ISOWEEK": "WEEKISO", 409 } 410 411 def quote_identifier(self, expression: E, identify: bool = True) -> E: 412 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 413 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 414 if ( 415 isinstance(expression, exp.Identifier) 416 and isinstance(expression.parent, exp.Table) 417 and expression.name.lower() == "dual" 418 ): 419 return expression # type: ignore 420 421 return super().quote_identifier(expression, identify=identify) 422 423 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 424 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 425 SINGLE_TOKENS.pop("$") 426 427 class Parser(parser.Parser): 428 IDENTIFY_PIVOT_STRINGS = True 429 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 430 COLON_IS_VARIANT_EXTRACT = True 431 432 ID_VAR_TOKENS = { 433 *parser.Parser.ID_VAR_TOKENS, 434 TokenType.MATCH_CONDITION, 435 } 436 437 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 438 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 439 440 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 441 442 FUNCTIONS = { 443 **parser.Parser.FUNCTIONS, 444 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 445 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 446 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 447 this=seq_get(args, 1), expression=seq_get(args, 0) 448 ), 449 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 450 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 451 start=seq_get(args, 0), 452 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 453 step=seq_get(args, 2), 454 ), 455 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 456 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 457 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 458 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 459 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 460 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 461 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 462 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 463 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 464 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 465 "DATE_TRUNC": _date_trunc_to_time, 466 "DATEADD": _build_date_time_add(exp.DateAdd), 467 "DATEDIFF": _build_datediff, 468 "DIV0": _build_if_from_div0, 469 "EDITDISTANCE": lambda args: exp.Levenshtein( 470 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 471 ), 472 "FLATTEN": exp.Explode.from_arg_list, 473 "GET_PATH": lambda args, dialect: exp.JSONExtract( 474 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 475 ), 476 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 477 "IFF": exp.If.from_arg_list, 478 "LAST_DAY": lambda args: exp.LastDay( 479 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 480 ), 481 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 482 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 483 "NULLIFZERO": _build_if_from_nullifzero, 484 "OBJECT_CONSTRUCT": _build_object_construct, 485 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 486 "REGEXP_REPLACE": _build_regexp_replace, 487 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 488 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 489 "REPLACE": build_replace_with_optional_replacement, 490 "RLIKE": exp.RegexpLike.from_arg_list, 491 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 492 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 493 "TIMEADD": _build_date_time_add(exp.TimeAdd), 494 "TIMEDIFF": _build_datediff, 495 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 496 "TIMESTAMPDIFF": _build_datediff, 497 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 498 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 499 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 500 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 501 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 502 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 503 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 504 "TRY_TO_TIMESTAMP": _build_datetime( 505 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 506 ), 507 "TO_CHAR": build_timetostr_or_tochar, 508 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 509 "TO_NUMBER": lambda args: exp.ToNumber( 510 this=seq_get(args, 0), 511 format=seq_get(args, 1), 512 precision=seq_get(args, 2), 513 scale=seq_get(args, 3), 514 ), 515 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 516 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 517 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 518 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 519 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 520 "TO_VARCHAR": exp.ToChar.from_arg_list, 521 "ZEROIFNULL": _build_if_from_zeroifnull, 522 } 523 524 FUNCTION_PARSERS = { 525 **parser.Parser.FUNCTION_PARSERS, 526 "DATE_PART": lambda self: self._parse_date_part(), 527 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 528 "LISTAGG": lambda self: self._parse_string_agg(), 529 } 530 FUNCTION_PARSERS.pop("TRIM") 531 532 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 533 534 RANGE_PARSERS = { 535 **parser.Parser.RANGE_PARSERS, 536 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 537 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 538 } 539 540 ALTER_PARSERS = { 541 **parser.Parser.ALTER_PARSERS, 542 "UNSET": lambda self: self.expression( 543 exp.Set, 544 tag=self._match_text_seq("TAG"), 545 expressions=self._parse_csv(self._parse_id_var), 546 unset=True, 547 ), 548 } 549 550 STATEMENT_PARSERS = { 551 **parser.Parser.STATEMENT_PARSERS, 552 TokenType.GET: lambda self: self._parse_get(), 553 TokenType.PUT: lambda self: self._parse_put(), 554 TokenType.SHOW: lambda self: self._parse_show(), 555 } 556 557 PROPERTY_PARSERS = { 558 **parser.Parser.PROPERTY_PARSERS, 559 "CREDENTIALS": lambda self: self._parse_credentials_property(), 560 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 561 "LOCATION": lambda self: self._parse_location_property(), 562 "TAG": lambda self: self._parse_tag(), 563 "USING": lambda self: self._match_text_seq("TEMPLATE") 564 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 565 } 566 567 TYPE_CONVERTERS = { 568 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 569 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 570 } 571 572 SHOW_PARSERS = { 573 "DATABASES": _show_parser("DATABASES"), 574 "TERSE DATABASES": _show_parser("DATABASES"), 575 "SCHEMAS": _show_parser("SCHEMAS"), 576 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 577 "OBJECTS": _show_parser("OBJECTS"), 578 "TERSE OBJECTS": _show_parser("OBJECTS"), 579 "TABLES": _show_parser("TABLES"), 580 "TERSE TABLES": _show_parser("TABLES"), 581 "VIEWS": _show_parser("VIEWS"), 582 "TERSE VIEWS": _show_parser("VIEWS"), 583 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 584 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 585 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 586 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 587 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 588 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 589 "SEQUENCES": _show_parser("SEQUENCES"), 590 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 591 "STAGES": _show_parser("STAGES"), 592 "COLUMNS": _show_parser("COLUMNS"), 593 "USERS": _show_parser("USERS"), 594 "TERSE USERS": _show_parser("USERS"), 595 "FILE FORMATS": _show_parser("FILE FORMATS"), 596 "FUNCTIONS": _show_parser("FUNCTIONS"), 597 "PROCEDURES": _show_parser("PROCEDURES"), 598 "WAREHOUSES": _show_parser("WAREHOUSES"), 599 } 600 601 CONSTRAINT_PARSERS = { 602 **parser.Parser.CONSTRAINT_PARSERS, 603 "WITH": lambda self: self._parse_with_constraint(), 604 "MASKING": lambda self: self._parse_with_constraint(), 605 "PROJECTION": lambda self: self._parse_with_constraint(), 606 "TAG": lambda self: self._parse_with_constraint(), 607 } 608 609 STAGED_FILE_SINGLE_TOKENS = { 610 TokenType.DOT, 611 TokenType.MOD, 612 TokenType.SLASH, 613 } 614 615 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 616 617 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 618 619 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 620 621 LAMBDAS = { 622 **parser.Parser.LAMBDAS, 623 TokenType.ARROW: lambda self, expressions: self.expression( 624 exp.Lambda, 625 this=self._replace_lambda( 626 self._parse_assignment(), 627 expressions, 628 ), 629 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 630 ), 631 } 632 633 def _parse_use(self) -> exp.Use: 634 if self._match_text_seq("SECONDARY", "ROLES"): 635 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 636 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 637 return self.expression( 638 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 639 ) 640 641 return super()._parse_use() 642 643 def _negate_range( 644 self, this: t.Optional[exp.Expression] = None 645 ) -> t.Optional[exp.Expression]: 646 if not this: 647 return this 648 649 query = this.args.get("query") 650 if isinstance(this, exp.In) and isinstance(query, exp.Query): 651 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 652 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 653 # which can produce different results (most likely a SnowFlake bug). 654 # 655 # https://docs.snowflake.com/en/sql-reference/functions/in 656 # Context: https://github.com/tobymao/sqlglot/issues/3890 657 return self.expression( 658 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 659 ) 660 661 return self.expression(exp.Not, this=this) 662 663 def _parse_tag(self) -> exp.Tags: 664 return self.expression( 665 exp.Tags, 666 expressions=self._parse_wrapped_csv(self._parse_property), 667 ) 668 669 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 670 if self._prev.token_type != TokenType.WITH: 671 self._retreat(self._index - 1) 672 673 if self._match_text_seq("MASKING", "POLICY"): 674 policy = self._parse_column() 675 return self.expression( 676 exp.MaskingPolicyColumnConstraint, 677 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 678 expressions=self._match(TokenType.USING) 679 and self._parse_wrapped_csv(self._parse_id_var), 680 ) 681 if self._match_text_seq("PROJECTION", "POLICY"): 682 policy = self._parse_column() 683 return self.expression( 684 exp.ProjectionPolicyColumnConstraint, 685 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 686 ) 687 if self._match(TokenType.TAG): 688 return self._parse_tag() 689 690 return None 691 692 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 693 if self._match(TokenType.TAG): 694 return self._parse_tag() 695 696 return super()._parse_with_property() 697 698 def _parse_create(self) -> exp.Create | exp.Command: 699 expression = super()._parse_create() 700 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 701 # Replace the Table node with the enclosed Identifier 702 expression.this.replace(expression.this.this) 703 704 return expression 705 706 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 707 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 708 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 709 this = self._parse_var() or self._parse_type() 710 711 if not this: 712 return None 713 714 self._match(TokenType.COMMA) 715 expression = self._parse_bitwise() 716 this = map_date_part(this) 717 name = this.name.upper() 718 719 if name.startswith("EPOCH"): 720 if name == "EPOCH_MILLISECOND": 721 scale = 10**3 722 elif name == "EPOCH_MICROSECOND": 723 scale = 10**6 724 elif name == "EPOCH_NANOSECOND": 725 scale = 10**9 726 else: 727 scale = None 728 729 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 730 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 731 732 if scale: 733 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 734 735 return to_unix 736 737 return self.expression(exp.Extract, this=this, expression=expression) 738 739 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 740 if is_map: 741 # Keys are strings in Snowflake's objects, see also: 742 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 743 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 744 return self._parse_slice(self._parse_string()) 745 746 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 747 748 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 749 lateral = super()._parse_lateral() 750 if not lateral: 751 return lateral 752 753 if isinstance(lateral.this, exp.Explode): 754 table_alias = lateral.args.get("alias") 755 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 756 if table_alias and not table_alias.args.get("columns"): 757 table_alias.set("columns", columns) 758 elif not table_alias: 759 exp.alias_(lateral, "_flattened", table=columns, copy=False) 760 761 return lateral 762 763 def _parse_table_parts( 764 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 765 ) -> exp.Table: 766 # https://docs.snowflake.com/en/user-guide/querying-stage 767 if self._match(TokenType.STRING, advance=False): 768 table = self._parse_string() 769 elif self._match_text_seq("@", advance=False): 770 table = self._parse_location_path() 771 else: 772 table = None 773 774 if table: 775 file_format = None 776 pattern = None 777 778 wrapped = self._match(TokenType.L_PAREN) 779 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 780 if self._match_text_seq("FILE_FORMAT", "=>"): 781 file_format = self._parse_string() or super()._parse_table_parts( 782 is_db_reference=is_db_reference 783 ) 784 elif self._match_text_seq("PATTERN", "=>"): 785 pattern = self._parse_string() 786 else: 787 break 788 789 self._match(TokenType.COMMA) 790 791 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 792 else: 793 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 794 795 return table 796 797 def _parse_table( 798 self, 799 schema: bool = False, 800 joins: bool = False, 801 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 802 parse_bracket: bool = False, 803 is_db_reference: bool = False, 804 parse_partition: bool = False, 805 consume_pipe: bool = False, 806 ) -> t.Optional[exp.Expression]: 807 table = super()._parse_table( 808 schema=schema, 809 joins=joins, 810 alias_tokens=alias_tokens, 811 parse_bracket=parse_bracket, 812 is_db_reference=is_db_reference, 813 parse_partition=parse_partition, 814 ) 815 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 816 table_from_rows = table.this 817 for arg in exp.TableFromRows.arg_types: 818 if arg != "this": 819 table_from_rows.set(arg, table.args.get(arg)) 820 821 table = table_from_rows 822 823 return table 824 825 def _parse_id_var( 826 self, 827 any_token: bool = True, 828 tokens: t.Optional[t.Collection[TokenType]] = None, 829 ) -> t.Optional[exp.Expression]: 830 if self._match_text_seq("IDENTIFIER", "("): 831 identifier = ( 832 super()._parse_id_var(any_token=any_token, tokens=tokens) 833 or self._parse_string() 834 ) 835 self._match_r_paren() 836 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 837 838 return super()._parse_id_var(any_token=any_token, tokens=tokens) 839 840 def _parse_show_snowflake(self, this: str) -> exp.Show: 841 scope = None 842 scope_kind = None 843 844 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 845 # which is syntactically valid but has no effect on the output 846 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 847 848 history = self._match_text_seq("HISTORY") 849 850 like = self._parse_string() if self._match(TokenType.LIKE) else None 851 852 if self._match(TokenType.IN): 853 if self._match_text_seq("ACCOUNT"): 854 scope_kind = "ACCOUNT" 855 elif self._match_text_seq("CLASS"): 856 scope_kind = "CLASS" 857 scope = self._parse_table_parts() 858 elif self._match_text_seq("APPLICATION"): 859 scope_kind = "APPLICATION" 860 if self._match_text_seq("PACKAGE"): 861 scope_kind += " PACKAGE" 862 scope = self._parse_table_parts() 863 elif self._match_set(self.DB_CREATABLES): 864 scope_kind = self._prev.text.upper() 865 if self._curr: 866 scope = self._parse_table_parts() 867 elif self._curr: 868 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 869 scope = self._parse_table_parts() 870 871 return self.expression( 872 exp.Show, 873 **{ 874 "terse": terse, 875 "this": this, 876 "history": history, 877 "like": like, 878 "scope": scope, 879 "scope_kind": scope_kind, 880 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 881 "limit": self._parse_limit(), 882 "from": self._parse_string() if self._match(TokenType.FROM) else None, 883 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 884 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 885 }, 886 ) 887 888 def _parse_put(self) -> exp.Put | exp.Command: 889 if self._curr.token_type != TokenType.STRING: 890 return self._parse_as_command(self._prev) 891 892 return self.expression( 893 exp.Put, 894 this=self._parse_string(), 895 target=self._parse_location_path(), 896 properties=self._parse_properties(), 897 ) 898 899 def _parse_get(self) -> t.Optional[exp.Expression]: 900 start = self._prev 901 902 # If we detect GET( then we need to parse a function, not a statement 903 if self._match(TokenType.L_PAREN): 904 self._retreat(self._index - 2) 905 return self._parse_expression() 906 907 target = self._parse_location_path() 908 909 # Parse as command if unquoted file path 910 if self._curr.token_type == TokenType.URI_START: 911 return self._parse_as_command(start) 912 913 return self.expression( 914 exp.Get, 915 this=self._parse_string(), 916 target=target, 917 properties=self._parse_properties(), 918 ) 919 920 def _parse_location_property(self) -> exp.LocationProperty: 921 self._match(TokenType.EQ) 922 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 923 924 def _parse_file_location(self) -> t.Optional[exp.Expression]: 925 # Parse either a subquery or a staged file 926 return ( 927 self._parse_select(table=True, parse_subquery_alias=False) 928 if self._match(TokenType.L_PAREN, advance=False) 929 else self._parse_table_parts() 930 ) 931 932 def _parse_location_path(self) -> exp.Var: 933 start = self._curr 934 self._advance_any(ignore_reserved=True) 935 936 # We avoid consuming a comma token because external tables like @foo and @bar 937 # can be joined in a query with a comma separator, as well as closing paren 938 # in case of subqueries 939 while self._is_connected() and not self._match_set( 940 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 941 ): 942 self._advance_any(ignore_reserved=True) 943 944 return exp.var(self._find_sql(start, self._prev)) 945 946 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 947 this = super()._parse_lambda_arg() 948 949 if not this: 950 return this 951 952 typ = self._parse_types() 953 954 if typ: 955 return self.expression(exp.Cast, this=this, to=typ) 956 957 return this 958 959 def _parse_foreign_key(self) -> exp.ForeignKey: 960 # inlineFK, the REFERENCES columns are implied 961 if self._match(TokenType.REFERENCES, advance=False): 962 return self.expression(exp.ForeignKey) 963 964 # outoflineFK, explicitly names the columns 965 return super()._parse_foreign_key() 966 967 def _parse_file_format_property(self) -> exp.FileFormatProperty: 968 self._match(TokenType.EQ) 969 if self._match(TokenType.L_PAREN, advance=False): 970 expressions = self._parse_wrapped_options() 971 else: 972 expressions = [self._parse_format_name()] 973 974 return self.expression( 975 exp.FileFormatProperty, 976 expressions=expressions, 977 ) 978 979 def _parse_credentials_property(self) -> exp.CredentialsProperty: 980 return self.expression( 981 exp.CredentialsProperty, 982 expressions=self._parse_wrapped_options(), 983 ) 984 985 class Tokenizer(tokens.Tokenizer): 986 STRING_ESCAPES = ["\\", "'"] 987 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 988 RAW_STRINGS = ["$$"] 989 COMMENTS = ["--", "//", ("/*", "*/")] 990 NESTED_COMMENTS = False 991 992 KEYWORDS = { 993 **tokens.Tokenizer.KEYWORDS, 994 "FILE://": TokenType.URI_START, 995 "BYTEINT": TokenType.INT, 996 "EXCLUDE": TokenType.EXCEPT, 997 "FILE FORMAT": TokenType.FILE_FORMAT, 998 "GET": TokenType.GET, 999 "ILIKE ANY": TokenType.ILIKE_ANY, 1000 "LIKE ANY": TokenType.LIKE_ANY, 1001 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1002 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1003 "MINUS": TokenType.EXCEPT, 1004 "NCHAR VARYING": TokenType.VARCHAR, 1005 "PUT": TokenType.PUT, 1006 "REMOVE": TokenType.COMMAND, 1007 "RM": TokenType.COMMAND, 1008 "SAMPLE": TokenType.TABLE_SAMPLE, 1009 "SQL_DOUBLE": TokenType.DOUBLE, 1010 "SQL_VARCHAR": TokenType.VARCHAR, 1011 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1012 "TAG": TokenType.TAG, 1013 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1014 "TOP": TokenType.TOP, 1015 "WAREHOUSE": TokenType.WAREHOUSE, 1016 "STAGE": TokenType.STAGE, 1017 "STREAMLIT": TokenType.STREAMLIT, 1018 } 1019 KEYWORDS.pop("/*+") 1020 1021 SINGLE_TOKENS = { 1022 **tokens.Tokenizer.SINGLE_TOKENS, 1023 "$": TokenType.PARAMETER, 1024 } 1025 1026 VAR_SINGLE_TOKENS = {"$"} 1027 1028 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1029 1030 class Generator(generator.Generator): 1031 PARAMETER_TOKEN = "$" 1032 MATCHED_BY_SOURCE = False 1033 SINGLE_STRING_INTERVAL = True 1034 JOIN_HINTS = False 1035 TABLE_HINTS = False 1036 QUERY_HINTS = False 1037 AGGREGATE_FILTER_SUPPORTED = False 1038 SUPPORTS_TABLE_COPY = False 1039 COLLATE_IS_FUNC = True 1040 LIMIT_ONLY_LITERALS = True 1041 JSON_KEY_VALUE_PAIR_SEP = "," 1042 INSERT_OVERWRITE = " OVERWRITE INTO" 1043 STRUCT_DELIMITER = ("(", ")") 1044 COPY_PARAMS_ARE_WRAPPED = False 1045 COPY_PARAMS_EQ_REQUIRED = True 1046 STAR_EXCEPT = "EXCLUDE" 1047 SUPPORTS_EXPLODING_PROJECTIONS = False 1048 ARRAY_CONCAT_IS_VAR_LEN = False 1049 SUPPORTS_CONVERT_TIMEZONE = True 1050 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1051 SUPPORTS_MEDIAN = True 1052 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1053 1054 TRANSFORMS = { 1055 **generator.Generator.TRANSFORMS, 1056 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1057 exp.ArgMax: rename_func("MAX_BY"), 1058 exp.ArgMin: rename_func("MIN_BY"), 1059 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1060 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1061 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1062 exp.AtTimeZone: lambda self, e: self.func( 1063 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1064 ), 1065 exp.BitwiseOr: rename_func("BITOR"), 1066 exp.BitwiseXor: rename_func("BITXOR"), 1067 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1068 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1069 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1070 exp.DateAdd: date_delta_sql("DATEADD"), 1071 exp.DateDiff: date_delta_sql("DATEDIFF"), 1072 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1073 exp.DatetimeDiff: timestampdiff_sql, 1074 exp.DateStrToDate: datestrtodate_sql, 1075 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1076 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1077 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1078 exp.DayOfYear: rename_func("DAYOFYEAR"), 1079 exp.Explode: rename_func("FLATTEN"), 1080 exp.Extract: lambda self, e: self.func( 1081 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1082 ), 1083 exp.FileFormatProperty: lambda self, 1084 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1085 exp.FromTimeZone: lambda self, e: self.func( 1086 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1087 ), 1088 exp.GenerateSeries: lambda self, e: self.func( 1089 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1090 ), 1091 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1092 exp.If: if_sql(name="IFF", false_value="NULL"), 1093 exp.JSONExtractArray: _json_extract_value_array_sql, 1094 exp.JSONExtractScalar: lambda self, e: self.func( 1095 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1096 ), 1097 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1098 exp.JSONPathRoot: lambda *_: "", 1099 exp.JSONValueArray: _json_extract_value_array_sql, 1100 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1101 rename_func("EDITDISTANCE") 1102 ), 1103 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1104 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1105 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1106 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1107 exp.MakeInterval: no_make_interval_sql, 1108 exp.Max: max_or_greatest, 1109 exp.Min: min_or_least, 1110 exp.ParseJSON: lambda self, e: self.func( 1111 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1112 ), 1113 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1114 exp.PercentileCont: transforms.preprocess( 1115 [transforms.add_within_group_for_percentiles] 1116 ), 1117 exp.PercentileDisc: transforms.preprocess( 1118 [transforms.add_within_group_for_percentiles] 1119 ), 1120 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1121 exp.RegexpExtract: _regexpextract_sql, 1122 exp.RegexpExtractAll: _regexpextract_sql, 1123 exp.RegexpILike: _regexpilike_sql, 1124 exp.Rand: rename_func("RANDOM"), 1125 exp.Select: transforms.preprocess( 1126 [ 1127 transforms.eliminate_window_clause, 1128 transforms.eliminate_distinct_on, 1129 transforms.explode_projection_to_unnest(), 1130 transforms.eliminate_semi_and_anti_joins, 1131 _transform_generate_date_array, 1132 _eliminate_dot_variant_lookup, 1133 ] 1134 ), 1135 exp.SHA: rename_func("SHA1"), 1136 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1137 exp.StartsWith: rename_func("STARTSWITH"), 1138 exp.EndsWith: rename_func("ENDSWITH"), 1139 exp.StrPosition: lambda self, e: strposition_sql( 1140 self, e, func_name="CHARINDEX", supports_position=True 1141 ), 1142 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1143 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1144 exp.Stuff: rename_func("INSERT"), 1145 exp.StPoint: rename_func("ST_MAKEPOINT"), 1146 exp.TimeAdd: date_delta_sql("TIMEADD"), 1147 exp.Timestamp: no_timestamp_sql, 1148 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1149 exp.TimestampDiff: lambda self, e: self.func( 1150 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1151 ), 1152 exp.TimestampTrunc: timestamptrunc_sql(), 1153 exp.TimeStrToTime: timestrtotime_sql, 1154 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1155 exp.ToArray: rename_func("TO_ARRAY"), 1156 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1157 exp.ToDouble: rename_func("TO_DOUBLE"), 1158 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1159 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1160 exp.TsOrDsToDate: lambda self, e: self.func( 1161 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1162 ), 1163 exp.TsOrDsToTime: lambda self, e: self.func( 1164 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1165 ), 1166 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1167 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1168 exp.Uuid: rename_func("UUID_STRING"), 1169 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1170 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1171 exp.Xor: rename_func("BOOLXOR"), 1172 } 1173 1174 SUPPORTED_JSON_PATH_PARTS = { 1175 exp.JSONPathKey, 1176 exp.JSONPathRoot, 1177 exp.JSONPathSubscript, 1178 } 1179 1180 TYPE_MAPPING = { 1181 **generator.Generator.TYPE_MAPPING, 1182 exp.DataType.Type.NESTED: "OBJECT", 1183 exp.DataType.Type.STRUCT: "OBJECT", 1184 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1185 } 1186 1187 TOKEN_MAPPING = { 1188 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1189 } 1190 1191 PROPERTIES_LOCATION = { 1192 **generator.Generator.PROPERTIES_LOCATION, 1193 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1194 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1195 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1196 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1197 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1198 } 1199 1200 UNSUPPORTED_VALUES_EXPRESSIONS = { 1201 exp.Map, 1202 exp.StarMap, 1203 exp.Struct, 1204 exp.VarMap, 1205 } 1206 1207 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1208 1209 def with_properties(self, properties: exp.Properties) -> str: 1210 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1211 1212 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1213 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1214 values_as_table = False 1215 1216 return super().values_sql(expression, values_as_table=values_as_table) 1217 1218 def datatype_sql(self, expression: exp.DataType) -> str: 1219 expressions = expression.expressions 1220 if ( 1221 expressions 1222 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1223 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1224 ): 1225 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1226 return "OBJECT" 1227 1228 return super().datatype_sql(expression) 1229 1230 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1231 return self.func( 1232 "TO_NUMBER", 1233 expression.this, 1234 expression.args.get("format"), 1235 expression.args.get("precision"), 1236 expression.args.get("scale"), 1237 ) 1238 1239 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1240 milli = expression.args.get("milli") 1241 if milli is not None: 1242 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1243 expression.set("nano", milli_to_nano) 1244 1245 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1246 1247 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1248 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1249 return self.func("TO_GEOGRAPHY", expression.this) 1250 if expression.is_type(exp.DataType.Type.GEOMETRY): 1251 return self.func("TO_GEOMETRY", expression.this) 1252 1253 return super().cast_sql(expression, safe_prefix=safe_prefix) 1254 1255 def trycast_sql(self, expression: exp.TryCast) -> str: 1256 value = expression.this 1257 1258 if value.type is None: 1259 from sqlglot.optimizer.annotate_types import annotate_types 1260 1261 value = annotate_types(value, dialect=self.dialect) 1262 1263 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1264 return super().trycast_sql(expression) 1265 1266 # TRY_CAST only works for string values in Snowflake 1267 return self.cast_sql(expression) 1268 1269 def log_sql(self, expression: exp.Log) -> str: 1270 if not expression.expression: 1271 return self.func("LN", expression.this) 1272 1273 return super().log_sql(expression) 1274 1275 def unnest_sql(self, expression: exp.Unnest) -> str: 1276 unnest_alias = expression.args.get("alias") 1277 offset = expression.args.get("offset") 1278 1279 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1280 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1281 1282 columns = [ 1283 exp.to_identifier("seq"), 1284 exp.to_identifier("key"), 1285 exp.to_identifier("path"), 1286 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1287 value, 1288 exp.to_identifier("this"), 1289 ] 1290 1291 if unnest_alias: 1292 unnest_alias.set("columns", columns) 1293 else: 1294 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1295 1296 table_input = self.sql(expression.expressions[0]) 1297 if not table_input.startswith("INPUT =>"): 1298 table_input = f"INPUT => {table_input}" 1299 1300 explode = f"TABLE(FLATTEN({table_input}))" 1301 alias = self.sql(unnest_alias) 1302 alias = f" AS {alias}" if alias else "" 1303 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1304 1305 return f"{value}{explode}{alias}" 1306 1307 def show_sql(self, expression: exp.Show) -> str: 1308 terse = "TERSE " if expression.args.get("terse") else "" 1309 history = " HISTORY" if expression.args.get("history") else "" 1310 like = self.sql(expression, "like") 1311 like = f" LIKE {like}" if like else "" 1312 1313 scope = self.sql(expression, "scope") 1314 scope = f" {scope}" if scope else "" 1315 1316 scope_kind = self.sql(expression, "scope_kind") 1317 if scope_kind: 1318 scope_kind = f" IN {scope_kind}" 1319 1320 starts_with = self.sql(expression, "starts_with") 1321 if starts_with: 1322 starts_with = f" STARTS WITH {starts_with}" 1323 1324 limit = self.sql(expression, "limit") 1325 1326 from_ = self.sql(expression, "from") 1327 if from_: 1328 from_ = f" FROM {from_}" 1329 1330 privileges = self.expressions(expression, key="privileges", flat=True) 1331 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1332 1333 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1334 1335 def describe_sql(self, expression: exp.Describe) -> str: 1336 # Default to table if kind is unknown 1337 kind_value = expression.args.get("kind") or "TABLE" 1338 kind = f" {kind_value}" if kind_value else "" 1339 this = f" {self.sql(expression, 'this')}" 1340 expressions = self.expressions(expression, flat=True) 1341 expressions = f" {expressions}" if expressions else "" 1342 return f"DESCRIBE{kind}{this}{expressions}" 1343 1344 def generatedasidentitycolumnconstraint_sql( 1345 self, expression: exp.GeneratedAsIdentityColumnConstraint 1346 ) -> str: 1347 start = expression.args.get("start") 1348 start = f" START {start}" if start else "" 1349 increment = expression.args.get("increment") 1350 increment = f" INCREMENT {increment}" if increment else "" 1351 1352 order = expression.args.get("order") 1353 if order is not None: 1354 order_clause = " ORDER" if order else " NOORDER" 1355 else: 1356 order_clause = "" 1357 1358 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1359 1360 def cluster_sql(self, expression: exp.Cluster) -> str: 1361 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1362 1363 def struct_sql(self, expression: exp.Struct) -> str: 1364 keys = [] 1365 values = [] 1366 1367 for i, e in enumerate(expression.expressions): 1368 if isinstance(e, exp.PropertyEQ): 1369 keys.append( 1370 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1371 ) 1372 values.append(e.expression) 1373 else: 1374 keys.append(exp.Literal.string(f"_{i}")) 1375 values.append(e) 1376 1377 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1378 1379 @unsupported_args("weight", "accuracy") 1380 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1381 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1382 1383 def alterset_sql(self, expression: exp.AlterSet) -> str: 1384 exprs = self.expressions(expression, flat=True) 1385 exprs = f" {exprs}" if exprs else "" 1386 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1387 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1388 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1389 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1390 tag = self.expressions(expression, key="tag", flat=True) 1391 tag = f" TAG {tag}" if tag else "" 1392 1393 return f"SET{exprs}{file_format}{copy_options}{tag}" 1394 1395 def strtotime_sql(self, expression: exp.StrToTime): 1396 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1397 return self.func( 1398 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1399 ) 1400 1401 def timestampsub_sql(self, expression: exp.TimestampSub): 1402 return self.sql( 1403 exp.TimestampAdd( 1404 this=expression.this, 1405 expression=expression.expression * -1, 1406 unit=expression.unit, 1407 ) 1408 ) 1409 1410 def jsonextract_sql(self, expression: exp.JSONExtract): 1411 this = expression.this 1412 1413 # JSON strings are valid coming from other dialects such as BQ 1414 return self.func( 1415 "GET_PATH", 1416 exp.ParseJSON(this=this) if this.is_string else this, 1417 expression.expression, 1418 ) 1419 1420 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1421 this = expression.this 1422 if this.is_string: 1423 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1424 1425 return self.func("TO_CHAR", this, self.format_time(expression)) 1426 1427 def datesub_sql(self, expression: exp.DateSub) -> str: 1428 value = expression.expression 1429 if value: 1430 value.replace(value * (-1)) 1431 else: 1432 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1433 1434 return date_delta_sql("DATEADD")(self, expression) 1435 1436 def select_sql(self, expression: exp.Select) -> str: 1437 limit = expression.args.get("limit") 1438 offset = expression.args.get("offset") 1439 if offset and not limit: 1440 expression.limit(exp.Null(), copy=False) 1441 return super().select_sql(expression) 1442 1443 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1444 is_materialized = expression.find(exp.MaterializedProperty) 1445 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1446 1447 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1448 # For materialized views, COPY GRANTS is located *before* the columns list 1449 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1450 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1451 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1452 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1453 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1454 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1455 1456 this_name = self.sql(expression.this, "this") 1457 copy_grants = self.sql(copy_grants_property) 1458 this_schema = self.schema_columns_sql(expression.this) 1459 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1460 1461 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1462 1463 return super().createable_sql(expression, locations) 1464 1465 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1466 this = expression.this 1467 1468 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1469 # and add it later as part of the WITHIN GROUP clause 1470 order = this if isinstance(this, exp.Order) else None 1471 if order: 1472 expression.set("this", order.this.pop()) 1473 1474 expr_sql = super().arrayagg_sql(expression) 1475 1476 if order: 1477 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1478 1479 return expr_sql 1480 1481 def array_sql(self, expression: exp.Array) -> str: 1482 expressions = expression.expressions 1483 1484 first_expr = seq_get(expressions, 0) 1485 if isinstance(first_expr, exp.Select): 1486 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1487 if first_expr.text("kind").upper() == "STRUCT": 1488 object_construct_args = [] 1489 for expr in first_expr.expressions: 1490 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1491 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1492 name = expr.this if isinstance(expr, exp.Alias) else expr 1493 1494 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1495 1496 array_agg = exp.ArrayAgg( 1497 this=_build_object_construct(args=object_construct_args) 1498 ) 1499 1500 first_expr.set("kind", None) 1501 first_expr.set("expressions", [array_agg]) 1502 1503 return self.sql(first_expr.subquery()) 1504 1505 return inline_array_sql(self, expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
411 def quote_identifier(self, expression: E, identify: bool = True) -> E: 412 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 413 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 414 if ( 415 isinstance(expression, exp.Identifier) 416 and isinstance(expression.parent, exp.Table) 417 and expression.name.lower() == "dual" 418 ): 419 return expression # type: ignore 420 421 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
423 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 424 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 425 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
427 class Parser(parser.Parser): 428 IDENTIFY_PIVOT_STRINGS = True 429 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 430 COLON_IS_VARIANT_EXTRACT = True 431 432 ID_VAR_TOKENS = { 433 *parser.Parser.ID_VAR_TOKENS, 434 TokenType.MATCH_CONDITION, 435 } 436 437 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 438 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 439 440 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 441 442 FUNCTIONS = { 443 **parser.Parser.FUNCTIONS, 444 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 445 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 446 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 447 this=seq_get(args, 1), expression=seq_get(args, 0) 448 ), 449 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 450 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 451 start=seq_get(args, 0), 452 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 453 step=seq_get(args, 2), 454 ), 455 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 456 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 457 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 458 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 459 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 460 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 461 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 462 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 463 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 464 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 465 "DATE_TRUNC": _date_trunc_to_time, 466 "DATEADD": _build_date_time_add(exp.DateAdd), 467 "DATEDIFF": _build_datediff, 468 "DIV0": _build_if_from_div0, 469 "EDITDISTANCE": lambda args: exp.Levenshtein( 470 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 471 ), 472 "FLATTEN": exp.Explode.from_arg_list, 473 "GET_PATH": lambda args, dialect: exp.JSONExtract( 474 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 475 ), 476 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 477 "IFF": exp.If.from_arg_list, 478 "LAST_DAY": lambda args: exp.LastDay( 479 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 480 ), 481 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 482 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 483 "NULLIFZERO": _build_if_from_nullifzero, 484 "OBJECT_CONSTRUCT": _build_object_construct, 485 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 486 "REGEXP_REPLACE": _build_regexp_replace, 487 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 488 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 489 "REPLACE": build_replace_with_optional_replacement, 490 "RLIKE": exp.RegexpLike.from_arg_list, 491 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 492 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 493 "TIMEADD": _build_date_time_add(exp.TimeAdd), 494 "TIMEDIFF": _build_datediff, 495 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 496 "TIMESTAMPDIFF": _build_datediff, 497 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 498 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 499 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 500 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 501 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 502 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 503 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 504 "TRY_TO_TIMESTAMP": _build_datetime( 505 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 506 ), 507 "TO_CHAR": build_timetostr_or_tochar, 508 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 509 "TO_NUMBER": lambda args: exp.ToNumber( 510 this=seq_get(args, 0), 511 format=seq_get(args, 1), 512 precision=seq_get(args, 2), 513 scale=seq_get(args, 3), 514 ), 515 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 516 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 517 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 518 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 519 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 520 "TO_VARCHAR": exp.ToChar.from_arg_list, 521 "ZEROIFNULL": _build_if_from_zeroifnull, 522 } 523 524 FUNCTION_PARSERS = { 525 **parser.Parser.FUNCTION_PARSERS, 526 "DATE_PART": lambda self: self._parse_date_part(), 527 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 528 "LISTAGG": lambda self: self._parse_string_agg(), 529 } 530 FUNCTION_PARSERS.pop("TRIM") 531 532 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 533 534 RANGE_PARSERS = { 535 **parser.Parser.RANGE_PARSERS, 536 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 537 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 538 } 539 540 ALTER_PARSERS = { 541 **parser.Parser.ALTER_PARSERS, 542 "UNSET": lambda self: self.expression( 543 exp.Set, 544 tag=self._match_text_seq("TAG"), 545 expressions=self._parse_csv(self._parse_id_var), 546 unset=True, 547 ), 548 } 549 550 STATEMENT_PARSERS = { 551 **parser.Parser.STATEMENT_PARSERS, 552 TokenType.GET: lambda self: self._parse_get(), 553 TokenType.PUT: lambda self: self._parse_put(), 554 TokenType.SHOW: lambda self: self._parse_show(), 555 } 556 557 PROPERTY_PARSERS = { 558 **parser.Parser.PROPERTY_PARSERS, 559 "CREDENTIALS": lambda self: self._parse_credentials_property(), 560 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 561 "LOCATION": lambda self: self._parse_location_property(), 562 "TAG": lambda self: self._parse_tag(), 563 "USING": lambda self: self._match_text_seq("TEMPLATE") 564 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 565 } 566 567 TYPE_CONVERTERS = { 568 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 569 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 570 } 571 572 SHOW_PARSERS = { 573 "DATABASES": _show_parser("DATABASES"), 574 "TERSE DATABASES": _show_parser("DATABASES"), 575 "SCHEMAS": _show_parser("SCHEMAS"), 576 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 577 "OBJECTS": _show_parser("OBJECTS"), 578 "TERSE OBJECTS": _show_parser("OBJECTS"), 579 "TABLES": _show_parser("TABLES"), 580 "TERSE TABLES": _show_parser("TABLES"), 581 "VIEWS": _show_parser("VIEWS"), 582 "TERSE VIEWS": _show_parser("VIEWS"), 583 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 584 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 585 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 586 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 587 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 588 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 589 "SEQUENCES": _show_parser("SEQUENCES"), 590 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 591 "STAGES": _show_parser("STAGES"), 592 "COLUMNS": _show_parser("COLUMNS"), 593 "USERS": _show_parser("USERS"), 594 "TERSE USERS": _show_parser("USERS"), 595 "FILE FORMATS": _show_parser("FILE FORMATS"), 596 "FUNCTIONS": _show_parser("FUNCTIONS"), 597 "PROCEDURES": _show_parser("PROCEDURES"), 598 "WAREHOUSES": _show_parser("WAREHOUSES"), 599 } 600 601 CONSTRAINT_PARSERS = { 602 **parser.Parser.CONSTRAINT_PARSERS, 603 "WITH": lambda self: self._parse_with_constraint(), 604 "MASKING": lambda self: self._parse_with_constraint(), 605 "PROJECTION": lambda self: self._parse_with_constraint(), 606 "TAG": lambda self: self._parse_with_constraint(), 607 } 608 609 STAGED_FILE_SINGLE_TOKENS = { 610 TokenType.DOT, 611 TokenType.MOD, 612 TokenType.SLASH, 613 } 614 615 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 616 617 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 618 619 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 620 621 LAMBDAS = { 622 **parser.Parser.LAMBDAS, 623 TokenType.ARROW: lambda self, expressions: self.expression( 624 exp.Lambda, 625 this=self._replace_lambda( 626 self._parse_assignment(), 627 expressions, 628 ), 629 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 630 ), 631 } 632 633 def _parse_use(self) -> exp.Use: 634 if self._match_text_seq("SECONDARY", "ROLES"): 635 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 636 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 637 return self.expression( 638 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 639 ) 640 641 return super()._parse_use() 642 643 def _negate_range( 644 self, this: t.Optional[exp.Expression] = None 645 ) -> t.Optional[exp.Expression]: 646 if not this: 647 return this 648 649 query = this.args.get("query") 650 if isinstance(this, exp.In) and isinstance(query, exp.Query): 651 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 652 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 653 # which can produce different results (most likely a SnowFlake bug). 654 # 655 # https://docs.snowflake.com/en/sql-reference/functions/in 656 # Context: https://github.com/tobymao/sqlglot/issues/3890 657 return self.expression( 658 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 659 ) 660 661 return self.expression(exp.Not, this=this) 662 663 def _parse_tag(self) -> exp.Tags: 664 return self.expression( 665 exp.Tags, 666 expressions=self._parse_wrapped_csv(self._parse_property), 667 ) 668 669 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 670 if self._prev.token_type != TokenType.WITH: 671 self._retreat(self._index - 1) 672 673 if self._match_text_seq("MASKING", "POLICY"): 674 policy = self._parse_column() 675 return self.expression( 676 exp.MaskingPolicyColumnConstraint, 677 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 678 expressions=self._match(TokenType.USING) 679 and self._parse_wrapped_csv(self._parse_id_var), 680 ) 681 if self._match_text_seq("PROJECTION", "POLICY"): 682 policy = self._parse_column() 683 return self.expression( 684 exp.ProjectionPolicyColumnConstraint, 685 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 686 ) 687 if self._match(TokenType.TAG): 688 return self._parse_tag() 689 690 return None 691 692 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 693 if self._match(TokenType.TAG): 694 return self._parse_tag() 695 696 return super()._parse_with_property() 697 698 def _parse_create(self) -> exp.Create | exp.Command: 699 expression = super()._parse_create() 700 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 701 # Replace the Table node with the enclosed Identifier 702 expression.this.replace(expression.this.this) 703 704 return expression 705 706 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 707 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 708 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 709 this = self._parse_var() or self._parse_type() 710 711 if not this: 712 return None 713 714 self._match(TokenType.COMMA) 715 expression = self._parse_bitwise() 716 this = map_date_part(this) 717 name = this.name.upper() 718 719 if name.startswith("EPOCH"): 720 if name == "EPOCH_MILLISECOND": 721 scale = 10**3 722 elif name == "EPOCH_MICROSECOND": 723 scale = 10**6 724 elif name == "EPOCH_NANOSECOND": 725 scale = 10**9 726 else: 727 scale = None 728 729 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 730 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 731 732 if scale: 733 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 734 735 return to_unix 736 737 return self.expression(exp.Extract, this=this, expression=expression) 738 739 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 740 if is_map: 741 # Keys are strings in Snowflake's objects, see also: 742 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 743 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 744 return self._parse_slice(self._parse_string()) 745 746 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 747 748 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 749 lateral = super()._parse_lateral() 750 if not lateral: 751 return lateral 752 753 if isinstance(lateral.this, exp.Explode): 754 table_alias = lateral.args.get("alias") 755 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 756 if table_alias and not table_alias.args.get("columns"): 757 table_alias.set("columns", columns) 758 elif not table_alias: 759 exp.alias_(lateral, "_flattened", table=columns, copy=False) 760 761 return lateral 762 763 def _parse_table_parts( 764 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 765 ) -> exp.Table: 766 # https://docs.snowflake.com/en/user-guide/querying-stage 767 if self._match(TokenType.STRING, advance=False): 768 table = self._parse_string() 769 elif self._match_text_seq("@", advance=False): 770 table = self._parse_location_path() 771 else: 772 table = None 773 774 if table: 775 file_format = None 776 pattern = None 777 778 wrapped = self._match(TokenType.L_PAREN) 779 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 780 if self._match_text_seq("FILE_FORMAT", "=>"): 781 file_format = self._parse_string() or super()._parse_table_parts( 782 is_db_reference=is_db_reference 783 ) 784 elif self._match_text_seq("PATTERN", "=>"): 785 pattern = self._parse_string() 786 else: 787 break 788 789 self._match(TokenType.COMMA) 790 791 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 792 else: 793 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 794 795 return table 796 797 def _parse_table( 798 self, 799 schema: bool = False, 800 joins: bool = False, 801 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 802 parse_bracket: bool = False, 803 is_db_reference: bool = False, 804 parse_partition: bool = False, 805 consume_pipe: bool = False, 806 ) -> t.Optional[exp.Expression]: 807 table = super()._parse_table( 808 schema=schema, 809 joins=joins, 810 alias_tokens=alias_tokens, 811 parse_bracket=parse_bracket, 812 is_db_reference=is_db_reference, 813 parse_partition=parse_partition, 814 ) 815 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 816 table_from_rows = table.this 817 for arg in exp.TableFromRows.arg_types: 818 if arg != "this": 819 table_from_rows.set(arg, table.args.get(arg)) 820 821 table = table_from_rows 822 823 return table 824 825 def _parse_id_var( 826 self, 827 any_token: bool = True, 828 tokens: t.Optional[t.Collection[TokenType]] = None, 829 ) -> t.Optional[exp.Expression]: 830 if self._match_text_seq("IDENTIFIER", "("): 831 identifier = ( 832 super()._parse_id_var(any_token=any_token, tokens=tokens) 833 or self._parse_string() 834 ) 835 self._match_r_paren() 836 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 837 838 return super()._parse_id_var(any_token=any_token, tokens=tokens) 839 840 def _parse_show_snowflake(self, this: str) -> exp.Show: 841 scope = None 842 scope_kind = None 843 844 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 845 # which is syntactically valid but has no effect on the output 846 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 847 848 history = self._match_text_seq("HISTORY") 849 850 like = self._parse_string() if self._match(TokenType.LIKE) else None 851 852 if self._match(TokenType.IN): 853 if self._match_text_seq("ACCOUNT"): 854 scope_kind = "ACCOUNT" 855 elif self._match_text_seq("CLASS"): 856 scope_kind = "CLASS" 857 scope = self._parse_table_parts() 858 elif self._match_text_seq("APPLICATION"): 859 scope_kind = "APPLICATION" 860 if self._match_text_seq("PACKAGE"): 861 scope_kind += " PACKAGE" 862 scope = self._parse_table_parts() 863 elif self._match_set(self.DB_CREATABLES): 864 scope_kind = self._prev.text.upper() 865 if self._curr: 866 scope = self._parse_table_parts() 867 elif self._curr: 868 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 869 scope = self._parse_table_parts() 870 871 return self.expression( 872 exp.Show, 873 **{ 874 "terse": terse, 875 "this": this, 876 "history": history, 877 "like": like, 878 "scope": scope, 879 "scope_kind": scope_kind, 880 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 881 "limit": self._parse_limit(), 882 "from": self._parse_string() if self._match(TokenType.FROM) else None, 883 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 884 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 885 }, 886 ) 887 888 def _parse_put(self) -> exp.Put | exp.Command: 889 if self._curr.token_type != TokenType.STRING: 890 return self._parse_as_command(self._prev) 891 892 return self.expression( 893 exp.Put, 894 this=self._parse_string(), 895 target=self._parse_location_path(), 896 properties=self._parse_properties(), 897 ) 898 899 def _parse_get(self) -> t.Optional[exp.Expression]: 900 start = self._prev 901 902 # If we detect GET( then we need to parse a function, not a statement 903 if self._match(TokenType.L_PAREN): 904 self._retreat(self._index - 2) 905 return self._parse_expression() 906 907 target = self._parse_location_path() 908 909 # Parse as command if unquoted file path 910 if self._curr.token_type == TokenType.URI_START: 911 return self._parse_as_command(start) 912 913 return self.expression( 914 exp.Get, 915 this=self._parse_string(), 916 target=target, 917 properties=self._parse_properties(), 918 ) 919 920 def _parse_location_property(self) -> exp.LocationProperty: 921 self._match(TokenType.EQ) 922 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 923 924 def _parse_file_location(self) -> t.Optional[exp.Expression]: 925 # Parse either a subquery or a staged file 926 return ( 927 self._parse_select(table=True, parse_subquery_alias=False) 928 if self._match(TokenType.L_PAREN, advance=False) 929 else self._parse_table_parts() 930 ) 931 932 def _parse_location_path(self) -> exp.Var: 933 start = self._curr 934 self._advance_any(ignore_reserved=True) 935 936 # We avoid consuming a comma token because external tables like @foo and @bar 937 # can be joined in a query with a comma separator, as well as closing paren 938 # in case of subqueries 939 while self._is_connected() and not self._match_set( 940 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 941 ): 942 self._advance_any(ignore_reserved=True) 943 944 return exp.var(self._find_sql(start, self._prev)) 945 946 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 947 this = super()._parse_lambda_arg() 948 949 if not this: 950 return this 951 952 typ = self._parse_types() 953 954 if typ: 955 return self.expression(exp.Cast, this=this, to=typ) 956 957 return this 958 959 def _parse_foreign_key(self) -> exp.ForeignKey: 960 # inlineFK, the REFERENCES columns are implied 961 if self._match(TokenType.REFERENCES, advance=False): 962 return self.expression(exp.ForeignKey) 963 964 # outoflineFK, explicitly names the columns 965 return super()._parse_foreign_key() 966 967 def _parse_file_format_property(self) -> exp.FileFormatProperty: 968 self._match(TokenType.EQ) 969 if self._match(TokenType.L_PAREN, advance=False): 970 expressions = self._parse_wrapped_options() 971 else: 972 expressions = [self._parse_format_name()] 973 974 return self.expression( 975 exp.FileFormatProperty, 976 expressions=expressions, 977 ) 978 979 def _parse_credentials_property(self) -> exp.CredentialsProperty: 980 return self.expression( 981 exp.CredentialsProperty, 982 expressions=self._parse_wrapped_options(), 983 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
985 class Tokenizer(tokens.Tokenizer): 986 STRING_ESCAPES = ["\\", "'"] 987 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 988 RAW_STRINGS = ["$$"] 989 COMMENTS = ["--", "//", ("/*", "*/")] 990 NESTED_COMMENTS = False 991 992 KEYWORDS = { 993 **tokens.Tokenizer.KEYWORDS, 994 "FILE://": TokenType.URI_START, 995 "BYTEINT": TokenType.INT, 996 "EXCLUDE": TokenType.EXCEPT, 997 "FILE FORMAT": TokenType.FILE_FORMAT, 998 "GET": TokenType.GET, 999 "ILIKE ANY": TokenType.ILIKE_ANY, 1000 "LIKE ANY": TokenType.LIKE_ANY, 1001 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1002 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1003 "MINUS": TokenType.EXCEPT, 1004 "NCHAR VARYING": TokenType.VARCHAR, 1005 "PUT": TokenType.PUT, 1006 "REMOVE": TokenType.COMMAND, 1007 "RM": TokenType.COMMAND, 1008 "SAMPLE": TokenType.TABLE_SAMPLE, 1009 "SQL_DOUBLE": TokenType.DOUBLE, 1010 "SQL_VARCHAR": TokenType.VARCHAR, 1011 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1012 "TAG": TokenType.TAG, 1013 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1014 "TOP": TokenType.TOP, 1015 "WAREHOUSE": TokenType.WAREHOUSE, 1016 "STAGE": TokenType.STAGE, 1017 "STREAMLIT": TokenType.STREAMLIT, 1018 } 1019 KEYWORDS.pop("/*+") 1020 1021 SINGLE_TOKENS = { 1022 **tokens.Tokenizer.SINGLE_TOKENS, 1023 "$": TokenType.PARAMETER, 1024 } 1025 1026 VAR_SINGLE_TOKENS = {"$"} 1027 1028 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1030 class Generator(generator.Generator): 1031 PARAMETER_TOKEN = "$" 1032 MATCHED_BY_SOURCE = False 1033 SINGLE_STRING_INTERVAL = True 1034 JOIN_HINTS = False 1035 TABLE_HINTS = False 1036 QUERY_HINTS = False 1037 AGGREGATE_FILTER_SUPPORTED = False 1038 SUPPORTS_TABLE_COPY = False 1039 COLLATE_IS_FUNC = True 1040 LIMIT_ONLY_LITERALS = True 1041 JSON_KEY_VALUE_PAIR_SEP = "," 1042 INSERT_OVERWRITE = " OVERWRITE INTO" 1043 STRUCT_DELIMITER = ("(", ")") 1044 COPY_PARAMS_ARE_WRAPPED = False 1045 COPY_PARAMS_EQ_REQUIRED = True 1046 STAR_EXCEPT = "EXCLUDE" 1047 SUPPORTS_EXPLODING_PROJECTIONS = False 1048 ARRAY_CONCAT_IS_VAR_LEN = False 1049 SUPPORTS_CONVERT_TIMEZONE = True 1050 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1051 SUPPORTS_MEDIAN = True 1052 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1053 1054 TRANSFORMS = { 1055 **generator.Generator.TRANSFORMS, 1056 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1057 exp.ArgMax: rename_func("MAX_BY"), 1058 exp.ArgMin: rename_func("MIN_BY"), 1059 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1060 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1061 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1062 exp.AtTimeZone: lambda self, e: self.func( 1063 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1064 ), 1065 exp.BitwiseOr: rename_func("BITOR"), 1066 exp.BitwiseXor: rename_func("BITXOR"), 1067 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1068 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1069 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1070 exp.DateAdd: date_delta_sql("DATEADD"), 1071 exp.DateDiff: date_delta_sql("DATEDIFF"), 1072 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1073 exp.DatetimeDiff: timestampdiff_sql, 1074 exp.DateStrToDate: datestrtodate_sql, 1075 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1076 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1077 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1078 exp.DayOfYear: rename_func("DAYOFYEAR"), 1079 exp.Explode: rename_func("FLATTEN"), 1080 exp.Extract: lambda self, e: self.func( 1081 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1082 ), 1083 exp.FileFormatProperty: lambda self, 1084 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1085 exp.FromTimeZone: lambda self, e: self.func( 1086 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1087 ), 1088 exp.GenerateSeries: lambda self, e: self.func( 1089 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1090 ), 1091 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1092 exp.If: if_sql(name="IFF", false_value="NULL"), 1093 exp.JSONExtractArray: _json_extract_value_array_sql, 1094 exp.JSONExtractScalar: lambda self, e: self.func( 1095 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1096 ), 1097 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1098 exp.JSONPathRoot: lambda *_: "", 1099 exp.JSONValueArray: _json_extract_value_array_sql, 1100 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1101 rename_func("EDITDISTANCE") 1102 ), 1103 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1104 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1105 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1106 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1107 exp.MakeInterval: no_make_interval_sql, 1108 exp.Max: max_or_greatest, 1109 exp.Min: min_or_least, 1110 exp.ParseJSON: lambda self, e: self.func( 1111 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1112 ), 1113 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1114 exp.PercentileCont: transforms.preprocess( 1115 [transforms.add_within_group_for_percentiles] 1116 ), 1117 exp.PercentileDisc: transforms.preprocess( 1118 [transforms.add_within_group_for_percentiles] 1119 ), 1120 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1121 exp.RegexpExtract: _regexpextract_sql, 1122 exp.RegexpExtractAll: _regexpextract_sql, 1123 exp.RegexpILike: _regexpilike_sql, 1124 exp.Rand: rename_func("RANDOM"), 1125 exp.Select: transforms.preprocess( 1126 [ 1127 transforms.eliminate_window_clause, 1128 transforms.eliminate_distinct_on, 1129 transforms.explode_projection_to_unnest(), 1130 transforms.eliminate_semi_and_anti_joins, 1131 _transform_generate_date_array, 1132 _eliminate_dot_variant_lookup, 1133 ] 1134 ), 1135 exp.SHA: rename_func("SHA1"), 1136 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1137 exp.StartsWith: rename_func("STARTSWITH"), 1138 exp.EndsWith: rename_func("ENDSWITH"), 1139 exp.StrPosition: lambda self, e: strposition_sql( 1140 self, e, func_name="CHARINDEX", supports_position=True 1141 ), 1142 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1143 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1144 exp.Stuff: rename_func("INSERT"), 1145 exp.StPoint: rename_func("ST_MAKEPOINT"), 1146 exp.TimeAdd: date_delta_sql("TIMEADD"), 1147 exp.Timestamp: no_timestamp_sql, 1148 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1149 exp.TimestampDiff: lambda self, e: self.func( 1150 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1151 ), 1152 exp.TimestampTrunc: timestamptrunc_sql(), 1153 exp.TimeStrToTime: timestrtotime_sql, 1154 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1155 exp.ToArray: rename_func("TO_ARRAY"), 1156 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1157 exp.ToDouble: rename_func("TO_DOUBLE"), 1158 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1159 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1160 exp.TsOrDsToDate: lambda self, e: self.func( 1161 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1162 ), 1163 exp.TsOrDsToTime: lambda self, e: self.func( 1164 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1165 ), 1166 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1167 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1168 exp.Uuid: rename_func("UUID_STRING"), 1169 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1170 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1171 exp.Xor: rename_func("BOOLXOR"), 1172 } 1173 1174 SUPPORTED_JSON_PATH_PARTS = { 1175 exp.JSONPathKey, 1176 exp.JSONPathRoot, 1177 exp.JSONPathSubscript, 1178 } 1179 1180 TYPE_MAPPING = { 1181 **generator.Generator.TYPE_MAPPING, 1182 exp.DataType.Type.NESTED: "OBJECT", 1183 exp.DataType.Type.STRUCT: "OBJECT", 1184 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1185 } 1186 1187 TOKEN_MAPPING = { 1188 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1189 } 1190 1191 PROPERTIES_LOCATION = { 1192 **generator.Generator.PROPERTIES_LOCATION, 1193 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1194 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1195 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1196 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1197 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1198 } 1199 1200 UNSUPPORTED_VALUES_EXPRESSIONS = { 1201 exp.Map, 1202 exp.StarMap, 1203 exp.Struct, 1204 exp.VarMap, 1205 } 1206 1207 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1208 1209 def with_properties(self, properties: exp.Properties) -> str: 1210 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1211 1212 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1213 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1214 values_as_table = False 1215 1216 return super().values_sql(expression, values_as_table=values_as_table) 1217 1218 def datatype_sql(self, expression: exp.DataType) -> str: 1219 expressions = expression.expressions 1220 if ( 1221 expressions 1222 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1223 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1224 ): 1225 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1226 return "OBJECT" 1227 1228 return super().datatype_sql(expression) 1229 1230 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1231 return self.func( 1232 "TO_NUMBER", 1233 expression.this, 1234 expression.args.get("format"), 1235 expression.args.get("precision"), 1236 expression.args.get("scale"), 1237 ) 1238 1239 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1240 milli = expression.args.get("milli") 1241 if milli is not None: 1242 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1243 expression.set("nano", milli_to_nano) 1244 1245 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1246 1247 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1248 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1249 return self.func("TO_GEOGRAPHY", expression.this) 1250 if expression.is_type(exp.DataType.Type.GEOMETRY): 1251 return self.func("TO_GEOMETRY", expression.this) 1252 1253 return super().cast_sql(expression, safe_prefix=safe_prefix) 1254 1255 def trycast_sql(self, expression: exp.TryCast) -> str: 1256 value = expression.this 1257 1258 if value.type is None: 1259 from sqlglot.optimizer.annotate_types import annotate_types 1260 1261 value = annotate_types(value, dialect=self.dialect) 1262 1263 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1264 return super().trycast_sql(expression) 1265 1266 # TRY_CAST only works for string values in Snowflake 1267 return self.cast_sql(expression) 1268 1269 def log_sql(self, expression: exp.Log) -> str: 1270 if not expression.expression: 1271 return self.func("LN", expression.this) 1272 1273 return super().log_sql(expression) 1274 1275 def unnest_sql(self, expression: exp.Unnest) -> str: 1276 unnest_alias = expression.args.get("alias") 1277 offset = expression.args.get("offset") 1278 1279 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1280 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1281 1282 columns = [ 1283 exp.to_identifier("seq"), 1284 exp.to_identifier("key"), 1285 exp.to_identifier("path"), 1286 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1287 value, 1288 exp.to_identifier("this"), 1289 ] 1290 1291 if unnest_alias: 1292 unnest_alias.set("columns", columns) 1293 else: 1294 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1295 1296 table_input = self.sql(expression.expressions[0]) 1297 if not table_input.startswith("INPUT =>"): 1298 table_input = f"INPUT => {table_input}" 1299 1300 explode = f"TABLE(FLATTEN({table_input}))" 1301 alias = self.sql(unnest_alias) 1302 alias = f" AS {alias}" if alias else "" 1303 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1304 1305 return f"{value}{explode}{alias}" 1306 1307 def show_sql(self, expression: exp.Show) -> str: 1308 terse = "TERSE " if expression.args.get("terse") else "" 1309 history = " HISTORY" if expression.args.get("history") else "" 1310 like = self.sql(expression, "like") 1311 like = f" LIKE {like}" if like else "" 1312 1313 scope = self.sql(expression, "scope") 1314 scope = f" {scope}" if scope else "" 1315 1316 scope_kind = self.sql(expression, "scope_kind") 1317 if scope_kind: 1318 scope_kind = f" IN {scope_kind}" 1319 1320 starts_with = self.sql(expression, "starts_with") 1321 if starts_with: 1322 starts_with = f" STARTS WITH {starts_with}" 1323 1324 limit = self.sql(expression, "limit") 1325 1326 from_ = self.sql(expression, "from") 1327 if from_: 1328 from_ = f" FROM {from_}" 1329 1330 privileges = self.expressions(expression, key="privileges", flat=True) 1331 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1332 1333 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1334 1335 def describe_sql(self, expression: exp.Describe) -> str: 1336 # Default to table if kind is unknown 1337 kind_value = expression.args.get("kind") or "TABLE" 1338 kind = f" {kind_value}" if kind_value else "" 1339 this = f" {self.sql(expression, 'this')}" 1340 expressions = self.expressions(expression, flat=True) 1341 expressions = f" {expressions}" if expressions else "" 1342 return f"DESCRIBE{kind}{this}{expressions}" 1343 1344 def generatedasidentitycolumnconstraint_sql( 1345 self, expression: exp.GeneratedAsIdentityColumnConstraint 1346 ) -> str: 1347 start = expression.args.get("start") 1348 start = f" START {start}" if start else "" 1349 increment = expression.args.get("increment") 1350 increment = f" INCREMENT {increment}" if increment else "" 1351 1352 order = expression.args.get("order") 1353 if order is not None: 1354 order_clause = " ORDER" if order else " NOORDER" 1355 else: 1356 order_clause = "" 1357 1358 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1359 1360 def cluster_sql(self, expression: exp.Cluster) -> str: 1361 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1362 1363 def struct_sql(self, expression: exp.Struct) -> str: 1364 keys = [] 1365 values = [] 1366 1367 for i, e in enumerate(expression.expressions): 1368 if isinstance(e, exp.PropertyEQ): 1369 keys.append( 1370 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1371 ) 1372 values.append(e.expression) 1373 else: 1374 keys.append(exp.Literal.string(f"_{i}")) 1375 values.append(e) 1376 1377 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1378 1379 @unsupported_args("weight", "accuracy") 1380 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1381 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1382 1383 def alterset_sql(self, expression: exp.AlterSet) -> str: 1384 exprs = self.expressions(expression, flat=True) 1385 exprs = f" {exprs}" if exprs else "" 1386 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1387 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1388 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1389 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1390 tag = self.expressions(expression, key="tag", flat=True) 1391 tag = f" TAG {tag}" if tag else "" 1392 1393 return f"SET{exprs}{file_format}{copy_options}{tag}" 1394 1395 def strtotime_sql(self, expression: exp.StrToTime): 1396 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1397 return self.func( 1398 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1399 ) 1400 1401 def timestampsub_sql(self, expression: exp.TimestampSub): 1402 return self.sql( 1403 exp.TimestampAdd( 1404 this=expression.this, 1405 expression=expression.expression * -1, 1406 unit=expression.unit, 1407 ) 1408 ) 1409 1410 def jsonextract_sql(self, expression: exp.JSONExtract): 1411 this = expression.this 1412 1413 # JSON strings are valid coming from other dialects such as BQ 1414 return self.func( 1415 "GET_PATH", 1416 exp.ParseJSON(this=this) if this.is_string else this, 1417 expression.expression, 1418 ) 1419 1420 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1421 this = expression.this 1422 if this.is_string: 1423 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1424 1425 return self.func("TO_CHAR", this, self.format_time(expression)) 1426 1427 def datesub_sql(self, expression: exp.DateSub) -> str: 1428 value = expression.expression 1429 if value: 1430 value.replace(value * (-1)) 1431 else: 1432 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1433 1434 return date_delta_sql("DATEADD")(self, expression) 1435 1436 def select_sql(self, expression: exp.Select) -> str: 1437 limit = expression.args.get("limit") 1438 offset = expression.args.get("offset") 1439 if offset and not limit: 1440 expression.limit(exp.Null(), copy=False) 1441 return super().select_sql(expression) 1442 1443 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1444 is_materialized = expression.find(exp.MaterializedProperty) 1445 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1446 1447 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1448 # For materialized views, COPY GRANTS is located *before* the columns list 1449 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1450 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1451 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1452 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1453 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1454 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1455 1456 this_name = self.sql(expression.this, "this") 1457 copy_grants = self.sql(copy_grants_property) 1458 this_schema = self.schema_columns_sql(expression.this) 1459 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1460 1461 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1462 1463 return super().createable_sql(expression, locations) 1464 1465 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1466 this = expression.this 1467 1468 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1469 # and add it later as part of the WITHIN GROUP clause 1470 order = this if isinstance(this, exp.Order) else None 1471 if order: 1472 expression.set("this", order.this.pop()) 1473 1474 expr_sql = super().arrayagg_sql(expression) 1475 1476 if order: 1477 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1478 1479 return expr_sql 1480 1481 def array_sql(self, expression: exp.Array) -> str: 1482 expressions = expression.expressions 1483 1484 first_expr = seq_get(expressions, 0) 1485 if isinstance(first_expr, exp.Select): 1486 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1487 if first_expr.text("kind").upper() == "STRUCT": 1488 object_construct_args = [] 1489 for expr in first_expr.expressions: 1490 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1491 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1492 name = expr.this if isinstance(expr, exp.Alias) else expr 1493 1494 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1495 1496 array_agg = exp.ArrayAgg( 1497 this=_build_object_construct(args=object_construct_args) 1498 ) 1499 1500 first_expr.set("kind", None) 1501 first_expr.set("expressions", [array_agg]) 1502 1503 return self.sql(first_expr.subquery()) 1504 1505 return inline_array_sql(self, expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1218 def datatype_sql(self, expression: exp.DataType) -> str: 1219 expressions = expression.expressions 1220 if ( 1221 expressions 1222 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1223 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1224 ): 1225 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1226 return "OBJECT" 1227 1228 return super().datatype_sql(expression)
1239 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1240 milli = expression.args.get("milli") 1241 if milli is not None: 1242 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1243 expression.set("nano", milli_to_nano) 1244 1245 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1247 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1248 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1249 return self.func("TO_GEOGRAPHY", expression.this) 1250 if expression.is_type(exp.DataType.Type.GEOMETRY): 1251 return self.func("TO_GEOMETRY", expression.this) 1252 1253 return super().cast_sql(expression, safe_prefix=safe_prefix)
1255 def trycast_sql(self, expression: exp.TryCast) -> str: 1256 value = expression.this 1257 1258 if value.type is None: 1259 from sqlglot.optimizer.annotate_types import annotate_types 1260 1261 value = annotate_types(value, dialect=self.dialect) 1262 1263 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1264 return super().trycast_sql(expression) 1265 1266 # TRY_CAST only works for string values in Snowflake 1267 return self.cast_sql(expression)
1275 def unnest_sql(self, expression: exp.Unnest) -> str: 1276 unnest_alias = expression.args.get("alias") 1277 offset = expression.args.get("offset") 1278 1279 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1280 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1281 1282 columns = [ 1283 exp.to_identifier("seq"), 1284 exp.to_identifier("key"), 1285 exp.to_identifier("path"), 1286 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1287 value, 1288 exp.to_identifier("this"), 1289 ] 1290 1291 if unnest_alias: 1292 unnest_alias.set("columns", columns) 1293 else: 1294 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1295 1296 table_input = self.sql(expression.expressions[0]) 1297 if not table_input.startswith("INPUT =>"): 1298 table_input = f"INPUT => {table_input}" 1299 1300 explode = f"TABLE(FLATTEN({table_input}))" 1301 alias = self.sql(unnest_alias) 1302 alias = f" AS {alias}" if alias else "" 1303 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1304 1305 return f"{value}{explode}{alias}"
1307 def show_sql(self, expression: exp.Show) -> str: 1308 terse = "TERSE " if expression.args.get("terse") else "" 1309 history = " HISTORY" if expression.args.get("history") else "" 1310 like = self.sql(expression, "like") 1311 like = f" LIKE {like}" if like else "" 1312 1313 scope = self.sql(expression, "scope") 1314 scope = f" {scope}" if scope else "" 1315 1316 scope_kind = self.sql(expression, "scope_kind") 1317 if scope_kind: 1318 scope_kind = f" IN {scope_kind}" 1319 1320 starts_with = self.sql(expression, "starts_with") 1321 if starts_with: 1322 starts_with = f" STARTS WITH {starts_with}" 1323 1324 limit = self.sql(expression, "limit") 1325 1326 from_ = self.sql(expression, "from") 1327 if from_: 1328 from_ = f" FROM {from_}" 1329 1330 privileges = self.expressions(expression, key="privileges", flat=True) 1331 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1332 1333 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1335 def describe_sql(self, expression: exp.Describe) -> str: 1336 # Default to table if kind is unknown 1337 kind_value = expression.args.get("kind") or "TABLE" 1338 kind = f" {kind_value}" if kind_value else "" 1339 this = f" {self.sql(expression, 'this')}" 1340 expressions = self.expressions(expression, flat=True) 1341 expressions = f" {expressions}" if expressions else "" 1342 return f"DESCRIBE{kind}{this}{expressions}"
1344 def generatedasidentitycolumnconstraint_sql( 1345 self, expression: exp.GeneratedAsIdentityColumnConstraint 1346 ) -> str: 1347 start = expression.args.get("start") 1348 start = f" START {start}" if start else "" 1349 increment = expression.args.get("increment") 1350 increment = f" INCREMENT {increment}" if increment else "" 1351 1352 order = expression.args.get("order") 1353 if order is not None: 1354 order_clause = " ORDER" if order else " NOORDER" 1355 else: 1356 order_clause = "" 1357 1358 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1363 def struct_sql(self, expression: exp.Struct) -> str: 1364 keys = [] 1365 values = [] 1366 1367 for i, e in enumerate(expression.expressions): 1368 if isinstance(e, exp.PropertyEQ): 1369 keys.append( 1370 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1371 ) 1372 values.append(e.expression) 1373 else: 1374 keys.append(exp.Literal.string(f"_{i}")) 1375 values.append(e) 1376 1377 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1383 def alterset_sql(self, expression: exp.AlterSet) -> str: 1384 exprs = self.expressions(expression, flat=True) 1385 exprs = f" {exprs}" if exprs else "" 1386 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1387 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1388 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1389 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1390 tag = self.expressions(expression, key="tag", flat=True) 1391 tag = f" TAG {tag}" if tag else "" 1392 1393 return f"SET{exprs}{file_format}{copy_options}{tag}"
1427 def datesub_sql(self, expression: exp.DateSub) -> str: 1428 value = expression.expression 1429 if value: 1430 value.replace(value * (-1)) 1431 else: 1432 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1433 1434 return date_delta_sql("DATEADD")(self, expression)
1443 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1444 is_materialized = expression.find(exp.MaterializedProperty) 1445 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1446 1447 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1448 # For materialized views, COPY GRANTS is located *before* the columns list 1449 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1450 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1451 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1452 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1453 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1454 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1455 1456 this_name = self.sql(expression.this, "this") 1457 copy_grants = self.sql(copy_grants_property) 1458 this_schema = self.schema_columns_sql(expression.this) 1459 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1460 1461 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1462 1463 return super().createable_sql(expression, locations)
1465 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1466 this = expression.this 1467 1468 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1469 # and add it later as part of the WITHIN GROUP clause 1470 order = this if isinstance(this, exp.Order) else None 1471 if order: 1472 expression.set("this", order.this.pop()) 1473 1474 expr_sql = super().arrayagg_sql(expression) 1475 1476 if order: 1477 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1478 1479 return expr_sql
1481 def array_sql(self, expression: exp.Array) -> str: 1482 expressions = expression.expressions 1483 1484 first_expr = seq_get(expressions, 0) 1485 if isinstance(first_expr, exp.Select): 1486 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1487 if first_expr.text("kind").upper() == "STRUCT": 1488 object_construct_args = [] 1489 for expr in first_expr.expressions: 1490 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1491 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1492 name = expr.this if isinstance(expr, exp.Alias) else expr 1493 1494 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1495 1496 array_agg = exp.ArrayAgg( 1497 this=_build_object_construct(args=object_construct_args) 1498 ) 1499 1500 first_expr.set("kind", None) 1501 first_expr.set("expressions", [array_agg]) 1502 1503 return self.sql(first_expr.subquery()) 1504 1505 return inline_array_sql(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql