sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_safe_divide_sql, 26 no_timestamp_sql, 27 timestampdiff_sql, 28 no_make_interval_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import flatten, is_float, is_int, seq_get 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E 36 37 38# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 39def _build_datetime( 40 name: str, kind: exp.DataType.Type, safe: bool = False 41) -> t.Callable[[t.List], exp.Func]: 42 def _builder(args: t.List) -> exp.Func: 43 value = seq_get(args, 0) 44 int_value = value is not None and is_int(value.name) 45 46 if isinstance(value, exp.Literal): 47 # Converts calls like `TO_TIME('01:02:03')` into casts 48 if len(args) == 1 and value.is_string and not int_value: 49 return ( 50 exp.TryCast(this=value, to=exp.DataType.build(kind)) 51 if safe 52 else exp.cast(value, kind) 53 ) 54 55 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 56 # cases so we can transpile them, since they're relatively common 57 if kind == exp.DataType.Type.TIMESTAMP: 58 if int_value and not safe: 59 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 60 # it's not easily transpilable 61 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 62 if not is_float(value.this): 63 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 64 expr.set("safe", safe) 65 return expr 66 67 if kind == exp.DataType.Type.DATE and not int_value: 68 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 69 formatted_exp.set("safe", safe) 70 return formatted_exp 71 72 return exp.Anonymous(this=name, expressions=args) 73 74 return _builder 75 76 77def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 78 expression = parser.build_var_map(args) 79 80 if isinstance(expression, exp.StarMap): 81 return expression 82 83 return exp.Struct( 84 expressions=[ 85 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 86 ] 87 ) 88 89 90def _build_datediff(args: t.List) -> exp.DateDiff: 91 return exp.DateDiff( 92 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 93 ) 94 95 96def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 97 def _builder(args: t.List) -> E: 98 return expr_type( 99 this=seq_get(args, 2), 100 expression=seq_get(args, 1), 101 unit=map_date_part(seq_get(args, 0)), 102 ) 103 104 return _builder 105 106 107# https://docs.snowflake.com/en/sql-reference/functions/div0 108def _build_if_from_div0(args: t.List) -> exp.If: 109 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 110 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 111 112 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 113 exp.Is(this=lhs, expression=exp.null()).not_() 114 ) 115 true = exp.Literal.number(0) 116 false = exp.Div(this=lhs, expression=rhs) 117 return exp.If(this=cond, true=true, false=false) 118 119 120# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 121def _build_if_from_zeroifnull(args: t.List) -> exp.If: 122 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 123 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 124 125 126# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 127def _build_if_from_nullifzero(args: t.List) -> exp.If: 128 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 129 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 130 131 132def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 133 flag = expression.text("flag") 134 135 if "i" not in flag: 136 flag += "i" 137 138 return self.func( 139 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 140 ) 141 142 143def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 144 regexp_replace = exp.RegexpReplace.from_arg_list(args) 145 146 if not regexp_replace.args.get("replacement"): 147 regexp_replace.set("replacement", exp.Literal.string("")) 148 149 return regexp_replace 150 151 152def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 153 def _parse(self: Snowflake.Parser) -> exp.Show: 154 return self._parse_show_snowflake(*args, **kwargs) 155 156 return _parse 157 158 159def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 160 trunc = date_trunc_to_time(args) 161 trunc.set("unit", map_date_part(trunc.args["unit"])) 162 return trunc 163 164 165def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 166 """ 167 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 168 so we need to unqualify them. 169 170 Example: 171 >>> from sqlglot import parse_one 172 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 173 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 174 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 175 """ 176 if isinstance(expression, exp.Pivot) and expression.unpivot: 177 expression = transforms.unqualify_columns(expression) 178 179 return expression 180 181 182def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 183 assert isinstance(expression, exp.Create) 184 185 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 186 if expression.this in exp.DataType.NESTED_TYPES: 187 expression.set("expressions", None) 188 return expression 189 190 props = expression.args.get("properties") 191 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 192 for schema_expression in expression.this.expressions: 193 if isinstance(schema_expression, exp.ColumnDef): 194 column_type = schema_expression.kind 195 if isinstance(column_type, exp.DataType): 196 column_type.transform(_flatten_structured_type, copy=False) 197 198 return expression 199 200 201def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 202 generate_date_array = unnest.expressions[0] 203 start = generate_date_array.args.get("start") 204 end = generate_date_array.args.get("end") 205 step = generate_date_array.args.get("step") 206 207 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 208 return 209 210 unit = step.args.get("unit") 211 212 unnest_alias = unnest.args.get("alias") 213 if unnest_alias: 214 unnest_alias = unnest_alias.copy() 215 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 216 else: 217 sequence_value_name = "value" 218 219 # We'll add the next sequence value to the starting date and project the result 220 date_add = _build_date_time_add(exp.DateAdd)( 221 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 222 ).as_(sequence_value_name) 223 224 # We use DATEDIFF to compute the number of sequence values needed 225 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 226 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 227 ) 228 229 unnest.set("expressions", [number_sequence]) 230 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 231 232 233def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 234 if isinstance(expression, exp.Select): 235 for generate_date_array in expression.find_all(exp.GenerateDateArray): 236 parent = generate_date_array.parent 237 238 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 239 # query is the following (it'll be unnested properly on the next iteration due to copy): 240 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 241 if not isinstance(parent, exp.Unnest): 242 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 243 generate_date_array.replace( 244 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 245 ) 246 247 if ( 248 isinstance(parent, exp.Unnest) 249 and isinstance(parent.parent, (exp.From, exp.Join)) 250 and len(parent.expressions) == 1 251 ): 252 _unnest_generate_date_array(parent) 253 254 return expression 255 256 257def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 258 def _builder(args: t.List) -> E: 259 return expr_type( 260 this=seq_get(args, 0), 261 expression=seq_get(args, 1), 262 position=seq_get(args, 2), 263 occurrence=seq_get(args, 3), 264 parameters=seq_get(args, 4), 265 group=seq_get(args, 5) or exp.Literal.number(0), 266 ) 267 268 return _builder 269 270 271def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 272 # Other dialects don't support all of the following parameters, so we need to 273 # generate default values as necessary to ensure the transpilation is correct 274 group = expression.args.get("group") 275 276 # To avoid generating all these default values, we set group to None if 277 # it's 0 (also default value) which doesn't trigger the following chain 278 if group and group.name == "0": 279 group = None 280 281 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 282 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 283 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 284 285 return self.func( 286 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 287 expression.this, 288 expression.expression, 289 position, 290 occurrence, 291 parameters, 292 group, 293 ) 294 295 296def _json_extract_value_array_sql( 297 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 298) -> str: 299 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 300 ident = exp.to_identifier("x") 301 302 if isinstance(expression, exp.JSONValueArray): 303 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 304 else: 305 this = exp.ParseJSON(this=f"TO_JSON({ident})") 306 307 transform_lambda = exp.Lambda(expressions=[ident], this=this) 308 309 return self.func("TRANSFORM", json_extract, transform_lambda) 310 311 312class Snowflake(Dialect): 313 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 314 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 315 NULL_ORDERING = "nulls_are_large" 316 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 317 SUPPORTS_USER_DEFINED_TYPES = False 318 SUPPORTS_SEMI_ANTI_JOIN = False 319 PREFER_CTE_ALIAS_COLUMN = True 320 TABLESAMPLE_SIZE_IS_PERCENT = True 321 COPY_PARAMS_ARE_CSV = False 322 ARRAY_AGG_INCLUDES_NULLS = None 323 324 TIME_MAPPING = { 325 "YYYY": "%Y", 326 "yyyy": "%Y", 327 "YY": "%y", 328 "yy": "%y", 329 "MMMM": "%B", 330 "mmmm": "%B", 331 "MON": "%b", 332 "mon": "%b", 333 "MM": "%m", 334 "mm": "%m", 335 "DD": "%d", 336 "dd": "%-d", 337 "DY": "%a", 338 "dy": "%w", 339 "HH24": "%H", 340 "hh24": "%H", 341 "HH12": "%I", 342 "hh12": "%I", 343 "MI": "%M", 344 "mi": "%M", 345 "SS": "%S", 346 "ss": "%S", 347 "FF": "%f", 348 "ff": "%f", 349 "FF6": "%f", 350 "ff6": "%f", 351 } 352 353 def quote_identifier(self, expression: E, identify: bool = True) -> E: 354 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 355 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 356 if ( 357 isinstance(expression, exp.Identifier) 358 and isinstance(expression.parent, exp.Table) 359 and expression.name.lower() == "dual" 360 ): 361 return expression # type: ignore 362 363 return super().quote_identifier(expression, identify=identify) 364 365 class Parser(parser.Parser): 366 IDENTIFY_PIVOT_STRINGS = True 367 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 368 COLON_IS_VARIANT_EXTRACT = True 369 370 ID_VAR_TOKENS = { 371 *parser.Parser.ID_VAR_TOKENS, 372 TokenType.MATCH_CONDITION, 373 } 374 375 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 376 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 377 378 FUNCTIONS = { 379 **parser.Parser.FUNCTIONS, 380 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 381 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 382 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 383 this=seq_get(args, 1), expression=seq_get(args, 0) 384 ), 385 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 386 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 387 start=seq_get(args, 0), 388 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 389 step=seq_get(args, 2), 390 ), 391 "BITXOR": binary_from_function(exp.BitwiseXor), 392 "BIT_XOR": binary_from_function(exp.BitwiseXor), 393 "BOOLXOR": binary_from_function(exp.Xor), 394 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 395 "DATE_TRUNC": _date_trunc_to_time, 396 "DATEADD": _build_date_time_add(exp.DateAdd), 397 "DATEDIFF": _build_datediff, 398 "DIV0": _build_if_from_div0, 399 "EDITDISTANCE": lambda args: exp.Levenshtein( 400 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 401 ), 402 "FLATTEN": exp.Explode.from_arg_list, 403 "GET_PATH": lambda args, dialect: exp.JSONExtract( 404 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 405 ), 406 "IFF": exp.If.from_arg_list, 407 "LAST_DAY": lambda args: exp.LastDay( 408 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 409 ), 410 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 411 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 412 "LISTAGG": exp.GroupConcat.from_arg_list, 413 "NULLIFZERO": _build_if_from_nullifzero, 414 "OBJECT_CONSTRUCT": _build_object_construct, 415 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 416 "REGEXP_REPLACE": _build_regexp_replace, 417 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 418 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 419 "RLIKE": exp.RegexpLike.from_arg_list, 420 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 421 "TIMEADD": _build_date_time_add(exp.TimeAdd), 422 "TIMEDIFF": _build_datediff, 423 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 424 "TIMESTAMPDIFF": _build_datediff, 425 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 426 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 427 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 428 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 429 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 430 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 431 "TRY_TO_TIMESTAMP": _build_datetime( 432 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 433 ), 434 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 435 "TO_NUMBER": lambda args: exp.ToNumber( 436 this=seq_get(args, 0), 437 format=seq_get(args, 1), 438 precision=seq_get(args, 2), 439 scale=seq_get(args, 3), 440 ), 441 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 442 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 443 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 444 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 445 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 446 "TO_VARCHAR": exp.ToChar.from_arg_list, 447 "ZEROIFNULL": _build_if_from_zeroifnull, 448 } 449 450 FUNCTION_PARSERS = { 451 **parser.Parser.FUNCTION_PARSERS, 452 "DATE_PART": lambda self: self._parse_date_part(), 453 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 454 } 455 FUNCTION_PARSERS.pop("TRIM") 456 457 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 458 459 RANGE_PARSERS = { 460 **parser.Parser.RANGE_PARSERS, 461 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 462 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 463 } 464 465 ALTER_PARSERS = { 466 **parser.Parser.ALTER_PARSERS, 467 "UNSET": lambda self: self.expression( 468 exp.Set, 469 tag=self._match_text_seq("TAG"), 470 expressions=self._parse_csv(self._parse_id_var), 471 unset=True, 472 ), 473 } 474 475 STATEMENT_PARSERS = { 476 **parser.Parser.STATEMENT_PARSERS, 477 TokenType.SHOW: lambda self: self._parse_show(), 478 } 479 480 PROPERTY_PARSERS = { 481 **parser.Parser.PROPERTY_PARSERS, 482 "LOCATION": lambda self: self._parse_location_property(), 483 "TAG": lambda self: self._parse_tag(), 484 } 485 486 TYPE_CONVERTERS = { 487 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 488 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 489 } 490 491 SHOW_PARSERS = { 492 "SCHEMAS": _show_parser("SCHEMAS"), 493 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 494 "OBJECTS": _show_parser("OBJECTS"), 495 "TERSE OBJECTS": _show_parser("OBJECTS"), 496 "TABLES": _show_parser("TABLES"), 497 "TERSE TABLES": _show_parser("TABLES"), 498 "VIEWS": _show_parser("VIEWS"), 499 "TERSE VIEWS": _show_parser("VIEWS"), 500 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 501 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 502 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 503 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 504 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 505 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 506 "SEQUENCES": _show_parser("SEQUENCES"), 507 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 508 "COLUMNS": _show_parser("COLUMNS"), 509 "USERS": _show_parser("USERS"), 510 "TERSE USERS": _show_parser("USERS"), 511 } 512 513 CONSTRAINT_PARSERS = { 514 **parser.Parser.CONSTRAINT_PARSERS, 515 "WITH": lambda self: self._parse_with_constraint(), 516 "MASKING": lambda self: self._parse_with_constraint(), 517 "PROJECTION": lambda self: self._parse_with_constraint(), 518 "TAG": lambda self: self._parse_with_constraint(), 519 } 520 521 STAGED_FILE_SINGLE_TOKENS = { 522 TokenType.DOT, 523 TokenType.MOD, 524 TokenType.SLASH, 525 } 526 527 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 528 529 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 530 531 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 532 533 LAMBDAS = { 534 **parser.Parser.LAMBDAS, 535 TokenType.ARROW: lambda self, expressions: self.expression( 536 exp.Lambda, 537 this=self._replace_lambda( 538 self._parse_assignment(), 539 expressions, 540 ), 541 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 542 ), 543 } 544 545 def _negate_range( 546 self, this: t.Optional[exp.Expression] = None 547 ) -> t.Optional[exp.Expression]: 548 if not this: 549 return this 550 551 query = this.args.get("query") 552 if isinstance(this, exp.In) and isinstance(query, exp.Query): 553 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 554 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 555 # which can produce different results (most likely a SnowFlake bug). 556 # 557 # https://docs.snowflake.com/en/sql-reference/functions/in 558 # Context: https://github.com/tobymao/sqlglot/issues/3890 559 return self.expression( 560 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 561 ) 562 563 return self.expression(exp.Not, this=this) 564 565 def _parse_tag(self) -> exp.Tags: 566 return self.expression( 567 exp.Tags, 568 expressions=self._parse_wrapped_csv(self._parse_property), 569 ) 570 571 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 572 if self._prev.token_type != TokenType.WITH: 573 self._retreat(self._index - 1) 574 575 if self._match_text_seq("MASKING", "POLICY"): 576 policy = self._parse_column() 577 return self.expression( 578 exp.MaskingPolicyColumnConstraint, 579 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 580 expressions=self._match(TokenType.USING) 581 and self._parse_wrapped_csv(self._parse_id_var), 582 ) 583 if self._match_text_seq("PROJECTION", "POLICY"): 584 policy = self._parse_column() 585 return self.expression( 586 exp.ProjectionPolicyColumnConstraint, 587 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 588 ) 589 if self._match(TokenType.TAG): 590 return self._parse_tag() 591 592 return None 593 594 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 595 if self._match(TokenType.TAG): 596 return self._parse_tag() 597 598 return super()._parse_with_property() 599 600 def _parse_create(self) -> exp.Create | exp.Command: 601 expression = super()._parse_create() 602 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 603 # Replace the Table node with the enclosed Identifier 604 expression.this.replace(expression.this.this) 605 606 return expression 607 608 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 609 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 610 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 611 this = self._parse_var() or self._parse_type() 612 613 if not this: 614 return None 615 616 self._match(TokenType.COMMA) 617 expression = self._parse_bitwise() 618 this = map_date_part(this) 619 name = this.name.upper() 620 621 if name.startswith("EPOCH"): 622 if name == "EPOCH_MILLISECOND": 623 scale = 10**3 624 elif name == "EPOCH_MICROSECOND": 625 scale = 10**6 626 elif name == "EPOCH_NANOSECOND": 627 scale = 10**9 628 else: 629 scale = None 630 631 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 632 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 633 634 if scale: 635 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 636 637 return to_unix 638 639 return self.expression(exp.Extract, this=this, expression=expression) 640 641 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 642 if is_map: 643 # Keys are strings in Snowflake's objects, see also: 644 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 645 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 646 return self._parse_slice(self._parse_string()) 647 648 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 649 650 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 651 lateral = super()._parse_lateral() 652 if not lateral: 653 return lateral 654 655 if isinstance(lateral.this, exp.Explode): 656 table_alias = lateral.args.get("alias") 657 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 658 if table_alias and not table_alias.args.get("columns"): 659 table_alias.set("columns", columns) 660 elif not table_alias: 661 exp.alias_(lateral, "_flattened", table=columns, copy=False) 662 663 return lateral 664 665 def _parse_table_parts( 666 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 667 ) -> exp.Table: 668 # https://docs.snowflake.com/en/user-guide/querying-stage 669 if self._match(TokenType.STRING, advance=False): 670 table = self._parse_string() 671 elif self._match_text_seq("@", advance=False): 672 table = self._parse_location_path() 673 else: 674 table = None 675 676 if table: 677 file_format = None 678 pattern = None 679 680 wrapped = self._match(TokenType.L_PAREN) 681 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 682 if self._match_text_seq("FILE_FORMAT", "=>"): 683 file_format = self._parse_string() or super()._parse_table_parts( 684 is_db_reference=is_db_reference 685 ) 686 elif self._match_text_seq("PATTERN", "=>"): 687 pattern = self._parse_string() 688 else: 689 break 690 691 self._match(TokenType.COMMA) 692 693 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 694 else: 695 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 696 697 return table 698 699 def _parse_id_var( 700 self, 701 any_token: bool = True, 702 tokens: t.Optional[t.Collection[TokenType]] = None, 703 ) -> t.Optional[exp.Expression]: 704 if self._match_text_seq("IDENTIFIER", "("): 705 identifier = ( 706 super()._parse_id_var(any_token=any_token, tokens=tokens) 707 or self._parse_string() 708 ) 709 self._match_r_paren() 710 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 711 712 return super()._parse_id_var(any_token=any_token, tokens=tokens) 713 714 def _parse_show_snowflake(self, this: str) -> exp.Show: 715 scope = None 716 scope_kind = None 717 718 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 719 # which is syntactically valid but has no effect on the output 720 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 721 722 history = self._match_text_seq("HISTORY") 723 724 like = self._parse_string() if self._match(TokenType.LIKE) else None 725 726 if self._match(TokenType.IN): 727 if self._match_text_seq("ACCOUNT"): 728 scope_kind = "ACCOUNT" 729 elif self._match_set(self.DB_CREATABLES): 730 scope_kind = self._prev.text.upper() 731 if self._curr: 732 scope = self._parse_table_parts() 733 elif self._curr: 734 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 735 scope = self._parse_table_parts() 736 737 return self.expression( 738 exp.Show, 739 **{ 740 "terse": terse, 741 "this": this, 742 "history": history, 743 "like": like, 744 "scope": scope, 745 "scope_kind": scope_kind, 746 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 747 "limit": self._parse_limit(), 748 "from": self._parse_string() if self._match(TokenType.FROM) else None, 749 }, 750 ) 751 752 def _parse_location_property(self) -> exp.LocationProperty: 753 self._match(TokenType.EQ) 754 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 755 756 def _parse_file_location(self) -> t.Optional[exp.Expression]: 757 # Parse either a subquery or a staged file 758 return ( 759 self._parse_select(table=True, parse_subquery_alias=False) 760 if self._match(TokenType.L_PAREN, advance=False) 761 else self._parse_table_parts() 762 ) 763 764 def _parse_location_path(self) -> exp.Var: 765 parts = [self._advance_any(ignore_reserved=True)] 766 767 # We avoid consuming a comma token because external tables like @foo and @bar 768 # can be joined in a query with a comma separator, as well as closing paren 769 # in case of subqueries 770 while self._is_connected() and not self._match_set( 771 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 772 ): 773 parts.append(self._advance_any(ignore_reserved=True)) 774 775 return exp.var("".join(part.text for part in parts if part)) 776 777 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 778 this = super()._parse_lambda_arg() 779 780 if not this: 781 return this 782 783 typ = self._parse_types() 784 785 if typ: 786 return self.expression(exp.Cast, this=this, to=typ) 787 788 return this 789 790 class Tokenizer(tokens.Tokenizer): 791 STRING_ESCAPES = ["\\", "'"] 792 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 793 RAW_STRINGS = ["$$"] 794 COMMENTS = ["--", "//", ("/*", "*/")] 795 NESTED_COMMENTS = False 796 797 KEYWORDS = { 798 **tokens.Tokenizer.KEYWORDS, 799 "BYTEINT": TokenType.INT, 800 "CHAR VARYING": TokenType.VARCHAR, 801 "CHARACTER VARYING": TokenType.VARCHAR, 802 "EXCLUDE": TokenType.EXCEPT, 803 "ILIKE ANY": TokenType.ILIKE_ANY, 804 "LIKE ANY": TokenType.LIKE_ANY, 805 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 806 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 807 "MINUS": TokenType.EXCEPT, 808 "NCHAR VARYING": TokenType.VARCHAR, 809 "PUT": TokenType.COMMAND, 810 "REMOVE": TokenType.COMMAND, 811 "RM": TokenType.COMMAND, 812 "SAMPLE": TokenType.TABLE_SAMPLE, 813 "SQL_DOUBLE": TokenType.DOUBLE, 814 "SQL_VARCHAR": TokenType.VARCHAR, 815 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 816 "TAG": TokenType.TAG, 817 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 818 "TOP": TokenType.TOP, 819 "WAREHOUSE": TokenType.WAREHOUSE, 820 "STREAMLIT": TokenType.STREAMLIT, 821 } 822 KEYWORDS.pop("/*+") 823 824 SINGLE_TOKENS = { 825 **tokens.Tokenizer.SINGLE_TOKENS, 826 "$": TokenType.PARAMETER, 827 } 828 829 VAR_SINGLE_TOKENS = {"$"} 830 831 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 832 833 class Generator(generator.Generator): 834 PARAMETER_TOKEN = "$" 835 MATCHED_BY_SOURCE = False 836 SINGLE_STRING_INTERVAL = True 837 JOIN_HINTS = False 838 TABLE_HINTS = False 839 QUERY_HINTS = False 840 AGGREGATE_FILTER_SUPPORTED = False 841 SUPPORTS_TABLE_COPY = False 842 COLLATE_IS_FUNC = True 843 LIMIT_ONLY_LITERALS = True 844 JSON_KEY_VALUE_PAIR_SEP = "," 845 INSERT_OVERWRITE = " OVERWRITE INTO" 846 STRUCT_DELIMITER = ("(", ")") 847 COPY_PARAMS_ARE_WRAPPED = False 848 COPY_PARAMS_EQ_REQUIRED = True 849 STAR_EXCEPT = "EXCLUDE" 850 SUPPORTS_EXPLODING_PROJECTIONS = False 851 ARRAY_CONCAT_IS_VAR_LEN = False 852 SUPPORTS_CONVERT_TIMEZONE = True 853 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 854 SUPPORTS_MEDIAN = True 855 ARRAY_SIZE_NAME = "ARRAY_SIZE" 856 857 TRANSFORMS = { 858 **generator.Generator.TRANSFORMS, 859 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 860 exp.ArgMax: rename_func("MAX_BY"), 861 exp.ArgMin: rename_func("MIN_BY"), 862 exp.Array: inline_array_sql, 863 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 864 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 865 exp.AtTimeZone: lambda self, e: self.func( 866 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 867 ), 868 exp.BitwiseXor: rename_func("BITXOR"), 869 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 870 exp.DateAdd: date_delta_sql("DATEADD"), 871 exp.DateDiff: date_delta_sql("DATEDIFF"), 872 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 873 exp.DatetimeDiff: timestampdiff_sql, 874 exp.DateStrToDate: datestrtodate_sql, 875 exp.DayOfMonth: rename_func("DAYOFMONTH"), 876 exp.DayOfWeek: rename_func("DAYOFWEEK"), 877 exp.DayOfYear: rename_func("DAYOFYEAR"), 878 exp.Explode: rename_func("FLATTEN"), 879 exp.Extract: rename_func("DATE_PART"), 880 exp.FromTimeZone: lambda self, e: self.func( 881 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 882 ), 883 exp.GenerateSeries: lambda self, e: self.func( 884 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 885 ), 886 exp.GroupConcat: rename_func("LISTAGG"), 887 exp.If: if_sql(name="IFF", false_value="NULL"), 888 exp.JSONExtractArray: _json_extract_value_array_sql, 889 exp.JSONExtractScalar: lambda self, e: self.func( 890 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 891 ), 892 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 893 exp.JSONPathRoot: lambda *_: "", 894 exp.JSONValueArray: _json_extract_value_array_sql, 895 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 896 exp.LogicalOr: rename_func("BOOLOR_AGG"), 897 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 898 exp.MakeInterval: no_make_interval_sql, 899 exp.Max: max_or_greatest, 900 exp.Min: min_or_least, 901 exp.ParseJSON: lambda self, e: self.func( 902 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 903 ), 904 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 905 exp.PercentileCont: transforms.preprocess( 906 [transforms.add_within_group_for_percentiles] 907 ), 908 exp.PercentileDisc: transforms.preprocess( 909 [transforms.add_within_group_for_percentiles] 910 ), 911 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 912 exp.RegexpExtract: _regexpextract_sql, 913 exp.RegexpExtractAll: _regexpextract_sql, 914 exp.RegexpILike: _regexpilike_sql, 915 exp.Rand: rename_func("RANDOM"), 916 exp.Select: transforms.preprocess( 917 [ 918 transforms.eliminate_distinct_on, 919 transforms.explode_to_unnest(), 920 transforms.eliminate_semi_and_anti_joins, 921 _transform_generate_date_array, 922 ] 923 ), 924 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 925 exp.SHA: rename_func("SHA1"), 926 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 927 exp.StartsWith: rename_func("STARTSWITH"), 928 exp.StrPosition: lambda self, e: self.func( 929 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 930 ), 931 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 932 exp.Stuff: rename_func("INSERT"), 933 exp.TimeAdd: date_delta_sql("TIMEADD"), 934 exp.Timestamp: no_timestamp_sql, 935 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 936 exp.TimestampDiff: lambda self, e: self.func( 937 "TIMESTAMPDIFF", e.unit, e.expression, e.this 938 ), 939 exp.TimestampTrunc: timestamptrunc_sql(), 940 exp.TimeStrToTime: timestrtotime_sql, 941 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 942 exp.ToArray: rename_func("TO_ARRAY"), 943 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 944 exp.ToDouble: rename_func("TO_DOUBLE"), 945 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 946 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 947 exp.TsOrDsToDate: lambda self, e: self.func( 948 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 949 ), 950 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 951 exp.Uuid: rename_func("UUID_STRING"), 952 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 953 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 954 exp.Xor: rename_func("BOOLXOR"), 955 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 956 rename_func("EDITDISTANCE") 957 ), 958 } 959 960 SUPPORTED_JSON_PATH_PARTS = { 961 exp.JSONPathKey, 962 exp.JSONPathRoot, 963 exp.JSONPathSubscript, 964 } 965 966 TYPE_MAPPING = { 967 **generator.Generator.TYPE_MAPPING, 968 exp.DataType.Type.NESTED: "OBJECT", 969 exp.DataType.Type.STRUCT: "OBJECT", 970 } 971 972 PROPERTIES_LOCATION = { 973 **generator.Generator.PROPERTIES_LOCATION, 974 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 975 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 976 } 977 978 UNSUPPORTED_VALUES_EXPRESSIONS = { 979 exp.Map, 980 exp.StarMap, 981 exp.Struct, 982 exp.VarMap, 983 } 984 985 def with_properties(self, properties: exp.Properties) -> str: 986 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 987 988 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 989 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 990 values_as_table = False 991 992 return super().values_sql(expression, values_as_table=values_as_table) 993 994 def datatype_sql(self, expression: exp.DataType) -> str: 995 expressions = expression.expressions 996 if ( 997 expressions 998 and expression.is_type(*exp.DataType.STRUCT_TYPES) 999 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1000 ): 1001 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1002 return "OBJECT" 1003 1004 return super().datatype_sql(expression) 1005 1006 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1007 return self.func( 1008 "TO_NUMBER", 1009 expression.this, 1010 expression.args.get("format"), 1011 expression.args.get("precision"), 1012 expression.args.get("scale"), 1013 ) 1014 1015 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1016 milli = expression.args.get("milli") 1017 if milli is not None: 1018 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1019 expression.set("nano", milli_to_nano) 1020 1021 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1022 1023 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1024 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1025 return self.func("TO_GEOGRAPHY", expression.this) 1026 if expression.is_type(exp.DataType.Type.GEOMETRY): 1027 return self.func("TO_GEOMETRY", expression.this) 1028 1029 return super().cast_sql(expression, safe_prefix=safe_prefix) 1030 1031 def trycast_sql(self, expression: exp.TryCast) -> str: 1032 value = expression.this 1033 1034 if value.type is None: 1035 from sqlglot.optimizer.annotate_types import annotate_types 1036 1037 value = annotate_types(value) 1038 1039 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1040 return super().trycast_sql(expression) 1041 1042 # TRY_CAST only works for string values in Snowflake 1043 return self.cast_sql(expression) 1044 1045 def log_sql(self, expression: exp.Log) -> str: 1046 if not expression.expression: 1047 return self.func("LN", expression.this) 1048 1049 return super().log_sql(expression) 1050 1051 def unnest_sql(self, expression: exp.Unnest) -> str: 1052 unnest_alias = expression.args.get("alias") 1053 offset = expression.args.get("offset") 1054 1055 columns = [ 1056 exp.to_identifier("seq"), 1057 exp.to_identifier("key"), 1058 exp.to_identifier("path"), 1059 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1060 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1061 or exp.to_identifier("value"), 1062 exp.to_identifier("this"), 1063 ] 1064 1065 if unnest_alias: 1066 unnest_alias.set("columns", columns) 1067 else: 1068 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1069 1070 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1071 alias = self.sql(unnest_alias) 1072 alias = f" AS {alias}" if alias else "" 1073 return f"{explode}{alias}" 1074 1075 def show_sql(self, expression: exp.Show) -> str: 1076 terse = "TERSE " if expression.args.get("terse") else "" 1077 history = " HISTORY" if expression.args.get("history") else "" 1078 like = self.sql(expression, "like") 1079 like = f" LIKE {like}" if like else "" 1080 1081 scope = self.sql(expression, "scope") 1082 scope = f" {scope}" if scope else "" 1083 1084 scope_kind = self.sql(expression, "scope_kind") 1085 if scope_kind: 1086 scope_kind = f" IN {scope_kind}" 1087 1088 starts_with = self.sql(expression, "starts_with") 1089 if starts_with: 1090 starts_with = f" STARTS WITH {starts_with}" 1091 1092 limit = self.sql(expression, "limit") 1093 1094 from_ = self.sql(expression, "from") 1095 if from_: 1096 from_ = f" FROM {from_}" 1097 1098 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1099 1100 def describe_sql(self, expression: exp.Describe) -> str: 1101 # Default to table if kind is unknown 1102 kind_value = expression.args.get("kind") or "TABLE" 1103 kind = f" {kind_value}" if kind_value else "" 1104 this = f" {self.sql(expression, 'this')}" 1105 expressions = self.expressions(expression, flat=True) 1106 expressions = f" {expressions}" if expressions else "" 1107 return f"DESCRIBE{kind}{this}{expressions}" 1108 1109 def generatedasidentitycolumnconstraint_sql( 1110 self, expression: exp.GeneratedAsIdentityColumnConstraint 1111 ) -> str: 1112 start = expression.args.get("start") 1113 start = f" START {start}" if start else "" 1114 increment = expression.args.get("increment") 1115 increment = f" INCREMENT {increment}" if increment else "" 1116 return f"AUTOINCREMENT{start}{increment}" 1117 1118 def cluster_sql(self, expression: exp.Cluster) -> str: 1119 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1120 1121 def struct_sql(self, expression: exp.Struct) -> str: 1122 keys = [] 1123 values = [] 1124 1125 for i, e in enumerate(expression.expressions): 1126 if isinstance(e, exp.PropertyEQ): 1127 keys.append( 1128 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1129 ) 1130 values.append(e.expression) 1131 else: 1132 keys.append(exp.Literal.string(f"_{i}")) 1133 values.append(e) 1134 1135 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1136 1137 @unsupported_args("weight", "accuracy") 1138 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1139 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1140 1141 def alterset_sql(self, expression: exp.AlterSet) -> str: 1142 exprs = self.expressions(expression, flat=True) 1143 exprs = f" {exprs}" if exprs else "" 1144 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1145 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1146 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1147 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1148 tag = self.expressions(expression, key="tag", flat=True) 1149 tag = f" TAG {tag}" if tag else "" 1150 1151 return f"SET{exprs}{file_format}{copy_options}{tag}" 1152 1153 def strtotime_sql(self, expression: exp.StrToTime): 1154 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1155 return self.func( 1156 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1157 ) 1158 1159 def timestampsub_sql(self, expression: exp.TimestampSub): 1160 return self.sql( 1161 exp.TimestampAdd( 1162 this=expression.this, 1163 expression=expression.expression * -1, 1164 unit=expression.unit, 1165 ) 1166 ) 1167 1168 def jsonextract_sql(self, expression: exp.JSONExtract): 1169 this = expression.this 1170 1171 # JSON strings are valid coming from other dialects such as BQ 1172 return self.func( 1173 "GET_PATH", 1174 exp.ParseJSON(this=this) if this.is_string else this, 1175 expression.expression, 1176 ) 1177 1178 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1179 this = expression.this 1180 if not isinstance(this, exp.TsOrDsToTimestamp): 1181 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1182 1183 return self.func("TO_CHAR", this, self.format_time(expression))
313class Snowflake(Dialect): 314 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 315 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 316 NULL_ORDERING = "nulls_are_large" 317 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 318 SUPPORTS_USER_DEFINED_TYPES = False 319 SUPPORTS_SEMI_ANTI_JOIN = False 320 PREFER_CTE_ALIAS_COLUMN = True 321 TABLESAMPLE_SIZE_IS_PERCENT = True 322 COPY_PARAMS_ARE_CSV = False 323 ARRAY_AGG_INCLUDES_NULLS = None 324 325 TIME_MAPPING = { 326 "YYYY": "%Y", 327 "yyyy": "%Y", 328 "YY": "%y", 329 "yy": "%y", 330 "MMMM": "%B", 331 "mmmm": "%B", 332 "MON": "%b", 333 "mon": "%b", 334 "MM": "%m", 335 "mm": "%m", 336 "DD": "%d", 337 "dd": "%-d", 338 "DY": "%a", 339 "dy": "%w", 340 "HH24": "%H", 341 "hh24": "%H", 342 "HH12": "%I", 343 "hh12": "%I", 344 "MI": "%M", 345 "mi": "%M", 346 "SS": "%S", 347 "ss": "%S", 348 "FF": "%f", 349 "ff": "%f", 350 "FF6": "%f", 351 "ff6": "%f", 352 } 353 354 def quote_identifier(self, expression: E, identify: bool = True) -> E: 355 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 356 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 357 if ( 358 isinstance(expression, exp.Identifier) 359 and isinstance(expression.parent, exp.Table) 360 and expression.name.lower() == "dual" 361 ): 362 return expression # type: ignore 363 364 return super().quote_identifier(expression, identify=identify) 365 366 class Parser(parser.Parser): 367 IDENTIFY_PIVOT_STRINGS = True 368 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 369 COLON_IS_VARIANT_EXTRACT = True 370 371 ID_VAR_TOKENS = { 372 *parser.Parser.ID_VAR_TOKENS, 373 TokenType.MATCH_CONDITION, 374 } 375 376 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 377 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 378 379 FUNCTIONS = { 380 **parser.Parser.FUNCTIONS, 381 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 382 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 383 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 384 this=seq_get(args, 1), expression=seq_get(args, 0) 385 ), 386 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 387 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 388 start=seq_get(args, 0), 389 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 390 step=seq_get(args, 2), 391 ), 392 "BITXOR": binary_from_function(exp.BitwiseXor), 393 "BIT_XOR": binary_from_function(exp.BitwiseXor), 394 "BOOLXOR": binary_from_function(exp.Xor), 395 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 396 "DATE_TRUNC": _date_trunc_to_time, 397 "DATEADD": _build_date_time_add(exp.DateAdd), 398 "DATEDIFF": _build_datediff, 399 "DIV0": _build_if_from_div0, 400 "EDITDISTANCE": lambda args: exp.Levenshtein( 401 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 402 ), 403 "FLATTEN": exp.Explode.from_arg_list, 404 "GET_PATH": lambda args, dialect: exp.JSONExtract( 405 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 406 ), 407 "IFF": exp.If.from_arg_list, 408 "LAST_DAY": lambda args: exp.LastDay( 409 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 410 ), 411 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 412 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 413 "LISTAGG": exp.GroupConcat.from_arg_list, 414 "NULLIFZERO": _build_if_from_nullifzero, 415 "OBJECT_CONSTRUCT": _build_object_construct, 416 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 417 "REGEXP_REPLACE": _build_regexp_replace, 418 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 419 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 420 "RLIKE": exp.RegexpLike.from_arg_list, 421 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 422 "TIMEADD": _build_date_time_add(exp.TimeAdd), 423 "TIMEDIFF": _build_datediff, 424 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 425 "TIMESTAMPDIFF": _build_datediff, 426 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 428 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 429 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 430 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 431 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 432 "TRY_TO_TIMESTAMP": _build_datetime( 433 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 434 ), 435 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 436 "TO_NUMBER": lambda args: exp.ToNumber( 437 this=seq_get(args, 0), 438 format=seq_get(args, 1), 439 precision=seq_get(args, 2), 440 scale=seq_get(args, 3), 441 ), 442 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 443 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 444 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 445 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 446 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 447 "TO_VARCHAR": exp.ToChar.from_arg_list, 448 "ZEROIFNULL": _build_if_from_zeroifnull, 449 } 450 451 FUNCTION_PARSERS = { 452 **parser.Parser.FUNCTION_PARSERS, 453 "DATE_PART": lambda self: self._parse_date_part(), 454 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 455 } 456 FUNCTION_PARSERS.pop("TRIM") 457 458 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 459 460 RANGE_PARSERS = { 461 **parser.Parser.RANGE_PARSERS, 462 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 463 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 464 } 465 466 ALTER_PARSERS = { 467 **parser.Parser.ALTER_PARSERS, 468 "UNSET": lambda self: self.expression( 469 exp.Set, 470 tag=self._match_text_seq("TAG"), 471 expressions=self._parse_csv(self._parse_id_var), 472 unset=True, 473 ), 474 } 475 476 STATEMENT_PARSERS = { 477 **parser.Parser.STATEMENT_PARSERS, 478 TokenType.SHOW: lambda self: self._parse_show(), 479 } 480 481 PROPERTY_PARSERS = { 482 **parser.Parser.PROPERTY_PARSERS, 483 "LOCATION": lambda self: self._parse_location_property(), 484 "TAG": lambda self: self._parse_tag(), 485 } 486 487 TYPE_CONVERTERS = { 488 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 489 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 490 } 491 492 SHOW_PARSERS = { 493 "SCHEMAS": _show_parser("SCHEMAS"), 494 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 495 "OBJECTS": _show_parser("OBJECTS"), 496 "TERSE OBJECTS": _show_parser("OBJECTS"), 497 "TABLES": _show_parser("TABLES"), 498 "TERSE TABLES": _show_parser("TABLES"), 499 "VIEWS": _show_parser("VIEWS"), 500 "TERSE VIEWS": _show_parser("VIEWS"), 501 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 502 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 503 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 504 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 505 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 506 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 507 "SEQUENCES": _show_parser("SEQUENCES"), 508 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 509 "COLUMNS": _show_parser("COLUMNS"), 510 "USERS": _show_parser("USERS"), 511 "TERSE USERS": _show_parser("USERS"), 512 } 513 514 CONSTRAINT_PARSERS = { 515 **parser.Parser.CONSTRAINT_PARSERS, 516 "WITH": lambda self: self._parse_with_constraint(), 517 "MASKING": lambda self: self._parse_with_constraint(), 518 "PROJECTION": lambda self: self._parse_with_constraint(), 519 "TAG": lambda self: self._parse_with_constraint(), 520 } 521 522 STAGED_FILE_SINGLE_TOKENS = { 523 TokenType.DOT, 524 TokenType.MOD, 525 TokenType.SLASH, 526 } 527 528 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 529 530 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 531 532 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 533 534 LAMBDAS = { 535 **parser.Parser.LAMBDAS, 536 TokenType.ARROW: lambda self, expressions: self.expression( 537 exp.Lambda, 538 this=self._replace_lambda( 539 self._parse_assignment(), 540 expressions, 541 ), 542 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 543 ), 544 } 545 546 def _negate_range( 547 self, this: t.Optional[exp.Expression] = None 548 ) -> t.Optional[exp.Expression]: 549 if not this: 550 return this 551 552 query = this.args.get("query") 553 if isinstance(this, exp.In) and isinstance(query, exp.Query): 554 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 555 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 556 # which can produce different results (most likely a SnowFlake bug). 557 # 558 # https://docs.snowflake.com/en/sql-reference/functions/in 559 # Context: https://github.com/tobymao/sqlglot/issues/3890 560 return self.expression( 561 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 562 ) 563 564 return self.expression(exp.Not, this=this) 565 566 def _parse_tag(self) -> exp.Tags: 567 return self.expression( 568 exp.Tags, 569 expressions=self._parse_wrapped_csv(self._parse_property), 570 ) 571 572 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 573 if self._prev.token_type != TokenType.WITH: 574 self._retreat(self._index - 1) 575 576 if self._match_text_seq("MASKING", "POLICY"): 577 policy = self._parse_column() 578 return self.expression( 579 exp.MaskingPolicyColumnConstraint, 580 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 581 expressions=self._match(TokenType.USING) 582 and self._parse_wrapped_csv(self._parse_id_var), 583 ) 584 if self._match_text_seq("PROJECTION", "POLICY"): 585 policy = self._parse_column() 586 return self.expression( 587 exp.ProjectionPolicyColumnConstraint, 588 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 589 ) 590 if self._match(TokenType.TAG): 591 return self._parse_tag() 592 593 return None 594 595 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 596 if self._match(TokenType.TAG): 597 return self._parse_tag() 598 599 return super()._parse_with_property() 600 601 def _parse_create(self) -> exp.Create | exp.Command: 602 expression = super()._parse_create() 603 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 604 # Replace the Table node with the enclosed Identifier 605 expression.this.replace(expression.this.this) 606 607 return expression 608 609 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 610 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 611 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 612 this = self._parse_var() or self._parse_type() 613 614 if not this: 615 return None 616 617 self._match(TokenType.COMMA) 618 expression = self._parse_bitwise() 619 this = map_date_part(this) 620 name = this.name.upper() 621 622 if name.startswith("EPOCH"): 623 if name == "EPOCH_MILLISECOND": 624 scale = 10**3 625 elif name == "EPOCH_MICROSECOND": 626 scale = 10**6 627 elif name == "EPOCH_NANOSECOND": 628 scale = 10**9 629 else: 630 scale = None 631 632 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 633 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 634 635 if scale: 636 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 637 638 return to_unix 639 640 return self.expression(exp.Extract, this=this, expression=expression) 641 642 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 643 if is_map: 644 # Keys are strings in Snowflake's objects, see also: 645 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 646 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 647 return self._parse_slice(self._parse_string()) 648 649 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 650 651 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 652 lateral = super()._parse_lateral() 653 if not lateral: 654 return lateral 655 656 if isinstance(lateral.this, exp.Explode): 657 table_alias = lateral.args.get("alias") 658 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 659 if table_alias and not table_alias.args.get("columns"): 660 table_alias.set("columns", columns) 661 elif not table_alias: 662 exp.alias_(lateral, "_flattened", table=columns, copy=False) 663 664 return lateral 665 666 def _parse_table_parts( 667 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 668 ) -> exp.Table: 669 # https://docs.snowflake.com/en/user-guide/querying-stage 670 if self._match(TokenType.STRING, advance=False): 671 table = self._parse_string() 672 elif self._match_text_seq("@", advance=False): 673 table = self._parse_location_path() 674 else: 675 table = None 676 677 if table: 678 file_format = None 679 pattern = None 680 681 wrapped = self._match(TokenType.L_PAREN) 682 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 683 if self._match_text_seq("FILE_FORMAT", "=>"): 684 file_format = self._parse_string() or super()._parse_table_parts( 685 is_db_reference=is_db_reference 686 ) 687 elif self._match_text_seq("PATTERN", "=>"): 688 pattern = self._parse_string() 689 else: 690 break 691 692 self._match(TokenType.COMMA) 693 694 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 695 else: 696 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 697 698 return table 699 700 def _parse_id_var( 701 self, 702 any_token: bool = True, 703 tokens: t.Optional[t.Collection[TokenType]] = None, 704 ) -> t.Optional[exp.Expression]: 705 if self._match_text_seq("IDENTIFIER", "("): 706 identifier = ( 707 super()._parse_id_var(any_token=any_token, tokens=tokens) 708 or self._parse_string() 709 ) 710 self._match_r_paren() 711 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 712 713 return super()._parse_id_var(any_token=any_token, tokens=tokens) 714 715 def _parse_show_snowflake(self, this: str) -> exp.Show: 716 scope = None 717 scope_kind = None 718 719 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 720 # which is syntactically valid but has no effect on the output 721 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 722 723 history = self._match_text_seq("HISTORY") 724 725 like = self._parse_string() if self._match(TokenType.LIKE) else None 726 727 if self._match(TokenType.IN): 728 if self._match_text_seq("ACCOUNT"): 729 scope_kind = "ACCOUNT" 730 elif self._match_set(self.DB_CREATABLES): 731 scope_kind = self._prev.text.upper() 732 if self._curr: 733 scope = self._parse_table_parts() 734 elif self._curr: 735 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 736 scope = self._parse_table_parts() 737 738 return self.expression( 739 exp.Show, 740 **{ 741 "terse": terse, 742 "this": this, 743 "history": history, 744 "like": like, 745 "scope": scope, 746 "scope_kind": scope_kind, 747 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 748 "limit": self._parse_limit(), 749 "from": self._parse_string() if self._match(TokenType.FROM) else None, 750 }, 751 ) 752 753 def _parse_location_property(self) -> exp.LocationProperty: 754 self._match(TokenType.EQ) 755 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 756 757 def _parse_file_location(self) -> t.Optional[exp.Expression]: 758 # Parse either a subquery or a staged file 759 return ( 760 self._parse_select(table=True, parse_subquery_alias=False) 761 if self._match(TokenType.L_PAREN, advance=False) 762 else self._parse_table_parts() 763 ) 764 765 def _parse_location_path(self) -> exp.Var: 766 parts = [self._advance_any(ignore_reserved=True)] 767 768 # We avoid consuming a comma token because external tables like @foo and @bar 769 # can be joined in a query with a comma separator, as well as closing paren 770 # in case of subqueries 771 while self._is_connected() and not self._match_set( 772 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 773 ): 774 parts.append(self._advance_any(ignore_reserved=True)) 775 776 return exp.var("".join(part.text for part in parts if part)) 777 778 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 779 this = super()._parse_lambda_arg() 780 781 if not this: 782 return this 783 784 typ = self._parse_types() 785 786 if typ: 787 return self.expression(exp.Cast, this=this, to=typ) 788 789 return this 790 791 class Tokenizer(tokens.Tokenizer): 792 STRING_ESCAPES = ["\\", "'"] 793 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 794 RAW_STRINGS = ["$$"] 795 COMMENTS = ["--", "//", ("/*", "*/")] 796 NESTED_COMMENTS = False 797 798 KEYWORDS = { 799 **tokens.Tokenizer.KEYWORDS, 800 "BYTEINT": TokenType.INT, 801 "CHAR VARYING": TokenType.VARCHAR, 802 "CHARACTER VARYING": TokenType.VARCHAR, 803 "EXCLUDE": TokenType.EXCEPT, 804 "ILIKE ANY": TokenType.ILIKE_ANY, 805 "LIKE ANY": TokenType.LIKE_ANY, 806 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 807 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 808 "MINUS": TokenType.EXCEPT, 809 "NCHAR VARYING": TokenType.VARCHAR, 810 "PUT": TokenType.COMMAND, 811 "REMOVE": TokenType.COMMAND, 812 "RM": TokenType.COMMAND, 813 "SAMPLE": TokenType.TABLE_SAMPLE, 814 "SQL_DOUBLE": TokenType.DOUBLE, 815 "SQL_VARCHAR": TokenType.VARCHAR, 816 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 817 "TAG": TokenType.TAG, 818 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 819 "TOP": TokenType.TOP, 820 "WAREHOUSE": TokenType.WAREHOUSE, 821 "STREAMLIT": TokenType.STREAMLIT, 822 } 823 KEYWORDS.pop("/*+") 824 825 SINGLE_TOKENS = { 826 **tokens.Tokenizer.SINGLE_TOKENS, 827 "$": TokenType.PARAMETER, 828 } 829 830 VAR_SINGLE_TOKENS = {"$"} 831 832 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 833 834 class Generator(generator.Generator): 835 PARAMETER_TOKEN = "$" 836 MATCHED_BY_SOURCE = False 837 SINGLE_STRING_INTERVAL = True 838 JOIN_HINTS = False 839 TABLE_HINTS = False 840 QUERY_HINTS = False 841 AGGREGATE_FILTER_SUPPORTED = False 842 SUPPORTS_TABLE_COPY = False 843 COLLATE_IS_FUNC = True 844 LIMIT_ONLY_LITERALS = True 845 JSON_KEY_VALUE_PAIR_SEP = "," 846 INSERT_OVERWRITE = " OVERWRITE INTO" 847 STRUCT_DELIMITER = ("(", ")") 848 COPY_PARAMS_ARE_WRAPPED = False 849 COPY_PARAMS_EQ_REQUIRED = True 850 STAR_EXCEPT = "EXCLUDE" 851 SUPPORTS_EXPLODING_PROJECTIONS = False 852 ARRAY_CONCAT_IS_VAR_LEN = False 853 SUPPORTS_CONVERT_TIMEZONE = True 854 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 855 SUPPORTS_MEDIAN = True 856 ARRAY_SIZE_NAME = "ARRAY_SIZE" 857 858 TRANSFORMS = { 859 **generator.Generator.TRANSFORMS, 860 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 861 exp.ArgMax: rename_func("MAX_BY"), 862 exp.ArgMin: rename_func("MIN_BY"), 863 exp.Array: inline_array_sql, 864 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 865 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 866 exp.AtTimeZone: lambda self, e: self.func( 867 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 868 ), 869 exp.BitwiseXor: rename_func("BITXOR"), 870 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 871 exp.DateAdd: date_delta_sql("DATEADD"), 872 exp.DateDiff: date_delta_sql("DATEDIFF"), 873 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 874 exp.DatetimeDiff: timestampdiff_sql, 875 exp.DateStrToDate: datestrtodate_sql, 876 exp.DayOfMonth: rename_func("DAYOFMONTH"), 877 exp.DayOfWeek: rename_func("DAYOFWEEK"), 878 exp.DayOfYear: rename_func("DAYOFYEAR"), 879 exp.Explode: rename_func("FLATTEN"), 880 exp.Extract: rename_func("DATE_PART"), 881 exp.FromTimeZone: lambda self, e: self.func( 882 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 883 ), 884 exp.GenerateSeries: lambda self, e: self.func( 885 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 886 ), 887 exp.GroupConcat: rename_func("LISTAGG"), 888 exp.If: if_sql(name="IFF", false_value="NULL"), 889 exp.JSONExtractArray: _json_extract_value_array_sql, 890 exp.JSONExtractScalar: lambda self, e: self.func( 891 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 892 ), 893 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 894 exp.JSONPathRoot: lambda *_: "", 895 exp.JSONValueArray: _json_extract_value_array_sql, 896 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 897 exp.LogicalOr: rename_func("BOOLOR_AGG"), 898 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 899 exp.MakeInterval: no_make_interval_sql, 900 exp.Max: max_or_greatest, 901 exp.Min: min_or_least, 902 exp.ParseJSON: lambda self, e: self.func( 903 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 904 ), 905 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 906 exp.PercentileCont: transforms.preprocess( 907 [transforms.add_within_group_for_percentiles] 908 ), 909 exp.PercentileDisc: transforms.preprocess( 910 [transforms.add_within_group_for_percentiles] 911 ), 912 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 913 exp.RegexpExtract: _regexpextract_sql, 914 exp.RegexpExtractAll: _regexpextract_sql, 915 exp.RegexpILike: _regexpilike_sql, 916 exp.Rand: rename_func("RANDOM"), 917 exp.Select: transforms.preprocess( 918 [ 919 transforms.eliminate_distinct_on, 920 transforms.explode_to_unnest(), 921 transforms.eliminate_semi_and_anti_joins, 922 _transform_generate_date_array, 923 ] 924 ), 925 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 926 exp.SHA: rename_func("SHA1"), 927 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 928 exp.StartsWith: rename_func("STARTSWITH"), 929 exp.StrPosition: lambda self, e: self.func( 930 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 931 ), 932 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 933 exp.Stuff: rename_func("INSERT"), 934 exp.TimeAdd: date_delta_sql("TIMEADD"), 935 exp.Timestamp: no_timestamp_sql, 936 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 937 exp.TimestampDiff: lambda self, e: self.func( 938 "TIMESTAMPDIFF", e.unit, e.expression, e.this 939 ), 940 exp.TimestampTrunc: timestamptrunc_sql(), 941 exp.TimeStrToTime: timestrtotime_sql, 942 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 943 exp.ToArray: rename_func("TO_ARRAY"), 944 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 945 exp.ToDouble: rename_func("TO_DOUBLE"), 946 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 947 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 948 exp.TsOrDsToDate: lambda self, e: self.func( 949 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 950 ), 951 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 952 exp.Uuid: rename_func("UUID_STRING"), 953 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 954 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 955 exp.Xor: rename_func("BOOLXOR"), 956 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 957 rename_func("EDITDISTANCE") 958 ), 959 } 960 961 SUPPORTED_JSON_PATH_PARTS = { 962 exp.JSONPathKey, 963 exp.JSONPathRoot, 964 exp.JSONPathSubscript, 965 } 966 967 TYPE_MAPPING = { 968 **generator.Generator.TYPE_MAPPING, 969 exp.DataType.Type.NESTED: "OBJECT", 970 exp.DataType.Type.STRUCT: "OBJECT", 971 } 972 973 PROPERTIES_LOCATION = { 974 **generator.Generator.PROPERTIES_LOCATION, 975 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 976 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 977 } 978 979 UNSUPPORTED_VALUES_EXPRESSIONS = { 980 exp.Map, 981 exp.StarMap, 982 exp.Struct, 983 exp.VarMap, 984 } 985 986 def with_properties(self, properties: exp.Properties) -> str: 987 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 988 989 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 990 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 991 values_as_table = False 992 993 return super().values_sql(expression, values_as_table=values_as_table) 994 995 def datatype_sql(self, expression: exp.DataType) -> str: 996 expressions = expression.expressions 997 if ( 998 expressions 999 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1000 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1001 ): 1002 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1003 return "OBJECT" 1004 1005 return super().datatype_sql(expression) 1006 1007 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1008 return self.func( 1009 "TO_NUMBER", 1010 expression.this, 1011 expression.args.get("format"), 1012 expression.args.get("precision"), 1013 expression.args.get("scale"), 1014 ) 1015 1016 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1017 milli = expression.args.get("milli") 1018 if milli is not None: 1019 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1020 expression.set("nano", milli_to_nano) 1021 1022 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1023 1024 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1025 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1026 return self.func("TO_GEOGRAPHY", expression.this) 1027 if expression.is_type(exp.DataType.Type.GEOMETRY): 1028 return self.func("TO_GEOMETRY", expression.this) 1029 1030 return super().cast_sql(expression, safe_prefix=safe_prefix) 1031 1032 def trycast_sql(self, expression: exp.TryCast) -> str: 1033 value = expression.this 1034 1035 if value.type is None: 1036 from sqlglot.optimizer.annotate_types import annotate_types 1037 1038 value = annotate_types(value) 1039 1040 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1041 return super().trycast_sql(expression) 1042 1043 # TRY_CAST only works for string values in Snowflake 1044 return self.cast_sql(expression) 1045 1046 def log_sql(self, expression: exp.Log) -> str: 1047 if not expression.expression: 1048 return self.func("LN", expression.this) 1049 1050 return super().log_sql(expression) 1051 1052 def unnest_sql(self, expression: exp.Unnest) -> str: 1053 unnest_alias = expression.args.get("alias") 1054 offset = expression.args.get("offset") 1055 1056 columns = [ 1057 exp.to_identifier("seq"), 1058 exp.to_identifier("key"), 1059 exp.to_identifier("path"), 1060 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1061 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1062 or exp.to_identifier("value"), 1063 exp.to_identifier("this"), 1064 ] 1065 1066 if unnest_alias: 1067 unnest_alias.set("columns", columns) 1068 else: 1069 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1070 1071 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1072 alias = self.sql(unnest_alias) 1073 alias = f" AS {alias}" if alias else "" 1074 return f"{explode}{alias}" 1075 1076 def show_sql(self, expression: exp.Show) -> str: 1077 terse = "TERSE " if expression.args.get("terse") else "" 1078 history = " HISTORY" if expression.args.get("history") else "" 1079 like = self.sql(expression, "like") 1080 like = f" LIKE {like}" if like else "" 1081 1082 scope = self.sql(expression, "scope") 1083 scope = f" {scope}" if scope else "" 1084 1085 scope_kind = self.sql(expression, "scope_kind") 1086 if scope_kind: 1087 scope_kind = f" IN {scope_kind}" 1088 1089 starts_with = self.sql(expression, "starts_with") 1090 if starts_with: 1091 starts_with = f" STARTS WITH {starts_with}" 1092 1093 limit = self.sql(expression, "limit") 1094 1095 from_ = self.sql(expression, "from") 1096 if from_: 1097 from_ = f" FROM {from_}" 1098 1099 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1100 1101 def describe_sql(self, expression: exp.Describe) -> str: 1102 # Default to table if kind is unknown 1103 kind_value = expression.args.get("kind") or "TABLE" 1104 kind = f" {kind_value}" if kind_value else "" 1105 this = f" {self.sql(expression, 'this')}" 1106 expressions = self.expressions(expression, flat=True) 1107 expressions = f" {expressions}" if expressions else "" 1108 return f"DESCRIBE{kind}{this}{expressions}" 1109 1110 def generatedasidentitycolumnconstraint_sql( 1111 self, expression: exp.GeneratedAsIdentityColumnConstraint 1112 ) -> str: 1113 start = expression.args.get("start") 1114 start = f" START {start}" if start else "" 1115 increment = expression.args.get("increment") 1116 increment = f" INCREMENT {increment}" if increment else "" 1117 return f"AUTOINCREMENT{start}{increment}" 1118 1119 def cluster_sql(self, expression: exp.Cluster) -> str: 1120 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1121 1122 def struct_sql(self, expression: exp.Struct) -> str: 1123 keys = [] 1124 values = [] 1125 1126 for i, e in enumerate(expression.expressions): 1127 if isinstance(e, exp.PropertyEQ): 1128 keys.append( 1129 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1130 ) 1131 values.append(e.expression) 1132 else: 1133 keys.append(exp.Literal.string(f"_{i}")) 1134 values.append(e) 1135 1136 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1137 1138 @unsupported_args("weight", "accuracy") 1139 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1140 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1141 1142 def alterset_sql(self, expression: exp.AlterSet) -> str: 1143 exprs = self.expressions(expression, flat=True) 1144 exprs = f" {exprs}" if exprs else "" 1145 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1146 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1147 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1148 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1149 tag = self.expressions(expression, key="tag", flat=True) 1150 tag = f" TAG {tag}" if tag else "" 1151 1152 return f"SET{exprs}{file_format}{copy_options}{tag}" 1153 1154 def strtotime_sql(self, expression: exp.StrToTime): 1155 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1156 return self.func( 1157 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1158 ) 1159 1160 def timestampsub_sql(self, expression: exp.TimestampSub): 1161 return self.sql( 1162 exp.TimestampAdd( 1163 this=expression.this, 1164 expression=expression.expression * -1, 1165 unit=expression.unit, 1166 ) 1167 ) 1168 1169 def jsonextract_sql(self, expression: exp.JSONExtract): 1170 this = expression.this 1171 1172 # JSON strings are valid coming from other dialects such as BQ 1173 return self.func( 1174 "GET_PATH", 1175 exp.ParseJSON(this=this) if this.is_string else this, 1176 expression.expression, 1177 ) 1178 1179 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1180 this = expression.this 1181 if not isinstance(this, exp.TsOrDsToTimestamp): 1182 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1183 1184 return self.func("TO_CHAR", this, self.format_time(expression))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
354 def quote_identifier(self, expression: E, identify: bool = True) -> E: 355 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 356 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 357 if ( 358 isinstance(expression, exp.Identifier) 359 and isinstance(expression.parent, exp.Table) 360 and expression.name.lower() == "dual" 361 ): 362 return expression # type: ignore 363 364 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
366 class Parser(parser.Parser): 367 IDENTIFY_PIVOT_STRINGS = True 368 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 369 COLON_IS_VARIANT_EXTRACT = True 370 371 ID_VAR_TOKENS = { 372 *parser.Parser.ID_VAR_TOKENS, 373 TokenType.MATCH_CONDITION, 374 } 375 376 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 377 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 378 379 FUNCTIONS = { 380 **parser.Parser.FUNCTIONS, 381 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 382 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 383 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 384 this=seq_get(args, 1), expression=seq_get(args, 0) 385 ), 386 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 387 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 388 start=seq_get(args, 0), 389 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 390 step=seq_get(args, 2), 391 ), 392 "BITXOR": binary_from_function(exp.BitwiseXor), 393 "BIT_XOR": binary_from_function(exp.BitwiseXor), 394 "BOOLXOR": binary_from_function(exp.Xor), 395 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 396 "DATE_TRUNC": _date_trunc_to_time, 397 "DATEADD": _build_date_time_add(exp.DateAdd), 398 "DATEDIFF": _build_datediff, 399 "DIV0": _build_if_from_div0, 400 "EDITDISTANCE": lambda args: exp.Levenshtein( 401 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 402 ), 403 "FLATTEN": exp.Explode.from_arg_list, 404 "GET_PATH": lambda args, dialect: exp.JSONExtract( 405 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 406 ), 407 "IFF": exp.If.from_arg_list, 408 "LAST_DAY": lambda args: exp.LastDay( 409 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 410 ), 411 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 412 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 413 "LISTAGG": exp.GroupConcat.from_arg_list, 414 "NULLIFZERO": _build_if_from_nullifzero, 415 "OBJECT_CONSTRUCT": _build_object_construct, 416 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 417 "REGEXP_REPLACE": _build_regexp_replace, 418 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 419 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 420 "RLIKE": exp.RegexpLike.from_arg_list, 421 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 422 "TIMEADD": _build_date_time_add(exp.TimeAdd), 423 "TIMEDIFF": _build_datediff, 424 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 425 "TIMESTAMPDIFF": _build_datediff, 426 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 428 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 429 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 430 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 431 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 432 "TRY_TO_TIMESTAMP": _build_datetime( 433 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 434 ), 435 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 436 "TO_NUMBER": lambda args: exp.ToNumber( 437 this=seq_get(args, 0), 438 format=seq_get(args, 1), 439 precision=seq_get(args, 2), 440 scale=seq_get(args, 3), 441 ), 442 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 443 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 444 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 445 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 446 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 447 "TO_VARCHAR": exp.ToChar.from_arg_list, 448 "ZEROIFNULL": _build_if_from_zeroifnull, 449 } 450 451 FUNCTION_PARSERS = { 452 **parser.Parser.FUNCTION_PARSERS, 453 "DATE_PART": lambda self: self._parse_date_part(), 454 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 455 } 456 FUNCTION_PARSERS.pop("TRIM") 457 458 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 459 460 RANGE_PARSERS = { 461 **parser.Parser.RANGE_PARSERS, 462 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 463 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 464 } 465 466 ALTER_PARSERS = { 467 **parser.Parser.ALTER_PARSERS, 468 "UNSET": lambda self: self.expression( 469 exp.Set, 470 tag=self._match_text_seq("TAG"), 471 expressions=self._parse_csv(self._parse_id_var), 472 unset=True, 473 ), 474 } 475 476 STATEMENT_PARSERS = { 477 **parser.Parser.STATEMENT_PARSERS, 478 TokenType.SHOW: lambda self: self._parse_show(), 479 } 480 481 PROPERTY_PARSERS = { 482 **parser.Parser.PROPERTY_PARSERS, 483 "LOCATION": lambda self: self._parse_location_property(), 484 "TAG": lambda self: self._parse_tag(), 485 } 486 487 TYPE_CONVERTERS = { 488 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 489 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 490 } 491 492 SHOW_PARSERS = { 493 "SCHEMAS": _show_parser("SCHEMAS"), 494 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 495 "OBJECTS": _show_parser("OBJECTS"), 496 "TERSE OBJECTS": _show_parser("OBJECTS"), 497 "TABLES": _show_parser("TABLES"), 498 "TERSE TABLES": _show_parser("TABLES"), 499 "VIEWS": _show_parser("VIEWS"), 500 "TERSE VIEWS": _show_parser("VIEWS"), 501 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 502 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 503 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 504 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 505 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 506 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 507 "SEQUENCES": _show_parser("SEQUENCES"), 508 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 509 "COLUMNS": _show_parser("COLUMNS"), 510 "USERS": _show_parser("USERS"), 511 "TERSE USERS": _show_parser("USERS"), 512 } 513 514 CONSTRAINT_PARSERS = { 515 **parser.Parser.CONSTRAINT_PARSERS, 516 "WITH": lambda self: self._parse_with_constraint(), 517 "MASKING": lambda self: self._parse_with_constraint(), 518 "PROJECTION": lambda self: self._parse_with_constraint(), 519 "TAG": lambda self: self._parse_with_constraint(), 520 } 521 522 STAGED_FILE_SINGLE_TOKENS = { 523 TokenType.DOT, 524 TokenType.MOD, 525 TokenType.SLASH, 526 } 527 528 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 529 530 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 531 532 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 533 534 LAMBDAS = { 535 **parser.Parser.LAMBDAS, 536 TokenType.ARROW: lambda self, expressions: self.expression( 537 exp.Lambda, 538 this=self._replace_lambda( 539 self._parse_assignment(), 540 expressions, 541 ), 542 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 543 ), 544 } 545 546 def _negate_range( 547 self, this: t.Optional[exp.Expression] = None 548 ) -> t.Optional[exp.Expression]: 549 if not this: 550 return this 551 552 query = this.args.get("query") 553 if isinstance(this, exp.In) and isinstance(query, exp.Query): 554 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 555 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 556 # which can produce different results (most likely a SnowFlake bug). 557 # 558 # https://docs.snowflake.com/en/sql-reference/functions/in 559 # Context: https://github.com/tobymao/sqlglot/issues/3890 560 return self.expression( 561 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 562 ) 563 564 return self.expression(exp.Not, this=this) 565 566 def _parse_tag(self) -> exp.Tags: 567 return self.expression( 568 exp.Tags, 569 expressions=self._parse_wrapped_csv(self._parse_property), 570 ) 571 572 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 573 if self._prev.token_type != TokenType.WITH: 574 self._retreat(self._index - 1) 575 576 if self._match_text_seq("MASKING", "POLICY"): 577 policy = self._parse_column() 578 return self.expression( 579 exp.MaskingPolicyColumnConstraint, 580 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 581 expressions=self._match(TokenType.USING) 582 and self._parse_wrapped_csv(self._parse_id_var), 583 ) 584 if self._match_text_seq("PROJECTION", "POLICY"): 585 policy = self._parse_column() 586 return self.expression( 587 exp.ProjectionPolicyColumnConstraint, 588 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 589 ) 590 if self._match(TokenType.TAG): 591 return self._parse_tag() 592 593 return None 594 595 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 596 if self._match(TokenType.TAG): 597 return self._parse_tag() 598 599 return super()._parse_with_property() 600 601 def _parse_create(self) -> exp.Create | exp.Command: 602 expression = super()._parse_create() 603 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 604 # Replace the Table node with the enclosed Identifier 605 expression.this.replace(expression.this.this) 606 607 return expression 608 609 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 610 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 611 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 612 this = self._parse_var() or self._parse_type() 613 614 if not this: 615 return None 616 617 self._match(TokenType.COMMA) 618 expression = self._parse_bitwise() 619 this = map_date_part(this) 620 name = this.name.upper() 621 622 if name.startswith("EPOCH"): 623 if name == "EPOCH_MILLISECOND": 624 scale = 10**3 625 elif name == "EPOCH_MICROSECOND": 626 scale = 10**6 627 elif name == "EPOCH_NANOSECOND": 628 scale = 10**9 629 else: 630 scale = None 631 632 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 633 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 634 635 if scale: 636 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 637 638 return to_unix 639 640 return self.expression(exp.Extract, this=this, expression=expression) 641 642 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 643 if is_map: 644 # Keys are strings in Snowflake's objects, see also: 645 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 646 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 647 return self._parse_slice(self._parse_string()) 648 649 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 650 651 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 652 lateral = super()._parse_lateral() 653 if not lateral: 654 return lateral 655 656 if isinstance(lateral.this, exp.Explode): 657 table_alias = lateral.args.get("alias") 658 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 659 if table_alias and not table_alias.args.get("columns"): 660 table_alias.set("columns", columns) 661 elif not table_alias: 662 exp.alias_(lateral, "_flattened", table=columns, copy=False) 663 664 return lateral 665 666 def _parse_table_parts( 667 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 668 ) -> exp.Table: 669 # https://docs.snowflake.com/en/user-guide/querying-stage 670 if self._match(TokenType.STRING, advance=False): 671 table = self._parse_string() 672 elif self._match_text_seq("@", advance=False): 673 table = self._parse_location_path() 674 else: 675 table = None 676 677 if table: 678 file_format = None 679 pattern = None 680 681 wrapped = self._match(TokenType.L_PAREN) 682 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 683 if self._match_text_seq("FILE_FORMAT", "=>"): 684 file_format = self._parse_string() or super()._parse_table_parts( 685 is_db_reference=is_db_reference 686 ) 687 elif self._match_text_seq("PATTERN", "=>"): 688 pattern = self._parse_string() 689 else: 690 break 691 692 self._match(TokenType.COMMA) 693 694 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 695 else: 696 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 697 698 return table 699 700 def _parse_id_var( 701 self, 702 any_token: bool = True, 703 tokens: t.Optional[t.Collection[TokenType]] = None, 704 ) -> t.Optional[exp.Expression]: 705 if self._match_text_seq("IDENTIFIER", "("): 706 identifier = ( 707 super()._parse_id_var(any_token=any_token, tokens=tokens) 708 or self._parse_string() 709 ) 710 self._match_r_paren() 711 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 712 713 return super()._parse_id_var(any_token=any_token, tokens=tokens) 714 715 def _parse_show_snowflake(self, this: str) -> exp.Show: 716 scope = None 717 scope_kind = None 718 719 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 720 # which is syntactically valid but has no effect on the output 721 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 722 723 history = self._match_text_seq("HISTORY") 724 725 like = self._parse_string() if self._match(TokenType.LIKE) else None 726 727 if self._match(TokenType.IN): 728 if self._match_text_seq("ACCOUNT"): 729 scope_kind = "ACCOUNT" 730 elif self._match_set(self.DB_CREATABLES): 731 scope_kind = self._prev.text.upper() 732 if self._curr: 733 scope = self._parse_table_parts() 734 elif self._curr: 735 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 736 scope = self._parse_table_parts() 737 738 return self.expression( 739 exp.Show, 740 **{ 741 "terse": terse, 742 "this": this, 743 "history": history, 744 "like": like, 745 "scope": scope, 746 "scope_kind": scope_kind, 747 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 748 "limit": self._parse_limit(), 749 "from": self._parse_string() if self._match(TokenType.FROM) else None, 750 }, 751 ) 752 753 def _parse_location_property(self) -> exp.LocationProperty: 754 self._match(TokenType.EQ) 755 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 756 757 def _parse_file_location(self) -> t.Optional[exp.Expression]: 758 # Parse either a subquery or a staged file 759 return ( 760 self._parse_select(table=True, parse_subquery_alias=False) 761 if self._match(TokenType.L_PAREN, advance=False) 762 else self._parse_table_parts() 763 ) 764 765 def _parse_location_path(self) -> exp.Var: 766 parts = [self._advance_any(ignore_reserved=True)] 767 768 # We avoid consuming a comma token because external tables like @foo and @bar 769 # can be joined in a query with a comma separator, as well as closing paren 770 # in case of subqueries 771 while self._is_connected() and not self._match_set( 772 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 773 ): 774 parts.append(self._advance_any(ignore_reserved=True)) 775 776 return exp.var("".join(part.text for part in parts if part)) 777 778 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 779 this = super()._parse_lambda_arg() 780 781 if not this: 782 return this 783 784 typ = self._parse_types() 785 786 if typ: 787 return self.expression(exp.Cast, this=this, to=typ) 788 789 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
791 class Tokenizer(tokens.Tokenizer): 792 STRING_ESCAPES = ["\\", "'"] 793 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 794 RAW_STRINGS = ["$$"] 795 COMMENTS = ["--", "//", ("/*", "*/")] 796 NESTED_COMMENTS = False 797 798 KEYWORDS = { 799 **tokens.Tokenizer.KEYWORDS, 800 "BYTEINT": TokenType.INT, 801 "CHAR VARYING": TokenType.VARCHAR, 802 "CHARACTER VARYING": TokenType.VARCHAR, 803 "EXCLUDE": TokenType.EXCEPT, 804 "ILIKE ANY": TokenType.ILIKE_ANY, 805 "LIKE ANY": TokenType.LIKE_ANY, 806 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 807 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 808 "MINUS": TokenType.EXCEPT, 809 "NCHAR VARYING": TokenType.VARCHAR, 810 "PUT": TokenType.COMMAND, 811 "REMOVE": TokenType.COMMAND, 812 "RM": TokenType.COMMAND, 813 "SAMPLE": TokenType.TABLE_SAMPLE, 814 "SQL_DOUBLE": TokenType.DOUBLE, 815 "SQL_VARCHAR": TokenType.VARCHAR, 816 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 817 "TAG": TokenType.TAG, 818 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 819 "TOP": TokenType.TOP, 820 "WAREHOUSE": TokenType.WAREHOUSE, 821 "STREAMLIT": TokenType.STREAMLIT, 822 } 823 KEYWORDS.pop("/*+") 824 825 SINGLE_TOKENS = { 826 **tokens.Tokenizer.SINGLE_TOKENS, 827 "$": TokenType.PARAMETER, 828 } 829 830 VAR_SINGLE_TOKENS = {"$"} 831 832 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
834 class Generator(generator.Generator): 835 PARAMETER_TOKEN = "$" 836 MATCHED_BY_SOURCE = False 837 SINGLE_STRING_INTERVAL = True 838 JOIN_HINTS = False 839 TABLE_HINTS = False 840 QUERY_HINTS = False 841 AGGREGATE_FILTER_SUPPORTED = False 842 SUPPORTS_TABLE_COPY = False 843 COLLATE_IS_FUNC = True 844 LIMIT_ONLY_LITERALS = True 845 JSON_KEY_VALUE_PAIR_SEP = "," 846 INSERT_OVERWRITE = " OVERWRITE INTO" 847 STRUCT_DELIMITER = ("(", ")") 848 COPY_PARAMS_ARE_WRAPPED = False 849 COPY_PARAMS_EQ_REQUIRED = True 850 STAR_EXCEPT = "EXCLUDE" 851 SUPPORTS_EXPLODING_PROJECTIONS = False 852 ARRAY_CONCAT_IS_VAR_LEN = False 853 SUPPORTS_CONVERT_TIMEZONE = True 854 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 855 SUPPORTS_MEDIAN = True 856 ARRAY_SIZE_NAME = "ARRAY_SIZE" 857 858 TRANSFORMS = { 859 **generator.Generator.TRANSFORMS, 860 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 861 exp.ArgMax: rename_func("MAX_BY"), 862 exp.ArgMin: rename_func("MIN_BY"), 863 exp.Array: inline_array_sql, 864 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 865 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 866 exp.AtTimeZone: lambda self, e: self.func( 867 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 868 ), 869 exp.BitwiseXor: rename_func("BITXOR"), 870 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 871 exp.DateAdd: date_delta_sql("DATEADD"), 872 exp.DateDiff: date_delta_sql("DATEDIFF"), 873 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 874 exp.DatetimeDiff: timestampdiff_sql, 875 exp.DateStrToDate: datestrtodate_sql, 876 exp.DayOfMonth: rename_func("DAYOFMONTH"), 877 exp.DayOfWeek: rename_func("DAYOFWEEK"), 878 exp.DayOfYear: rename_func("DAYOFYEAR"), 879 exp.Explode: rename_func("FLATTEN"), 880 exp.Extract: rename_func("DATE_PART"), 881 exp.FromTimeZone: lambda self, e: self.func( 882 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 883 ), 884 exp.GenerateSeries: lambda self, e: self.func( 885 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 886 ), 887 exp.GroupConcat: rename_func("LISTAGG"), 888 exp.If: if_sql(name="IFF", false_value="NULL"), 889 exp.JSONExtractArray: _json_extract_value_array_sql, 890 exp.JSONExtractScalar: lambda self, e: self.func( 891 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 892 ), 893 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 894 exp.JSONPathRoot: lambda *_: "", 895 exp.JSONValueArray: _json_extract_value_array_sql, 896 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 897 exp.LogicalOr: rename_func("BOOLOR_AGG"), 898 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 899 exp.MakeInterval: no_make_interval_sql, 900 exp.Max: max_or_greatest, 901 exp.Min: min_or_least, 902 exp.ParseJSON: lambda self, e: self.func( 903 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 904 ), 905 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 906 exp.PercentileCont: transforms.preprocess( 907 [transforms.add_within_group_for_percentiles] 908 ), 909 exp.PercentileDisc: transforms.preprocess( 910 [transforms.add_within_group_for_percentiles] 911 ), 912 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 913 exp.RegexpExtract: _regexpextract_sql, 914 exp.RegexpExtractAll: _regexpextract_sql, 915 exp.RegexpILike: _regexpilike_sql, 916 exp.Rand: rename_func("RANDOM"), 917 exp.Select: transforms.preprocess( 918 [ 919 transforms.eliminate_distinct_on, 920 transforms.explode_to_unnest(), 921 transforms.eliminate_semi_and_anti_joins, 922 _transform_generate_date_array, 923 ] 924 ), 925 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 926 exp.SHA: rename_func("SHA1"), 927 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 928 exp.StartsWith: rename_func("STARTSWITH"), 929 exp.StrPosition: lambda self, e: self.func( 930 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 931 ), 932 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 933 exp.Stuff: rename_func("INSERT"), 934 exp.TimeAdd: date_delta_sql("TIMEADD"), 935 exp.Timestamp: no_timestamp_sql, 936 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 937 exp.TimestampDiff: lambda self, e: self.func( 938 "TIMESTAMPDIFF", e.unit, e.expression, e.this 939 ), 940 exp.TimestampTrunc: timestamptrunc_sql(), 941 exp.TimeStrToTime: timestrtotime_sql, 942 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 943 exp.ToArray: rename_func("TO_ARRAY"), 944 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 945 exp.ToDouble: rename_func("TO_DOUBLE"), 946 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 947 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 948 exp.TsOrDsToDate: lambda self, e: self.func( 949 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 950 ), 951 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 952 exp.Uuid: rename_func("UUID_STRING"), 953 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 954 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 955 exp.Xor: rename_func("BOOLXOR"), 956 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 957 rename_func("EDITDISTANCE") 958 ), 959 } 960 961 SUPPORTED_JSON_PATH_PARTS = { 962 exp.JSONPathKey, 963 exp.JSONPathRoot, 964 exp.JSONPathSubscript, 965 } 966 967 TYPE_MAPPING = { 968 **generator.Generator.TYPE_MAPPING, 969 exp.DataType.Type.NESTED: "OBJECT", 970 exp.DataType.Type.STRUCT: "OBJECT", 971 } 972 973 PROPERTIES_LOCATION = { 974 **generator.Generator.PROPERTIES_LOCATION, 975 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 976 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 977 } 978 979 UNSUPPORTED_VALUES_EXPRESSIONS = { 980 exp.Map, 981 exp.StarMap, 982 exp.Struct, 983 exp.VarMap, 984 } 985 986 def with_properties(self, properties: exp.Properties) -> str: 987 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 988 989 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 990 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 991 values_as_table = False 992 993 return super().values_sql(expression, values_as_table=values_as_table) 994 995 def datatype_sql(self, expression: exp.DataType) -> str: 996 expressions = expression.expressions 997 if ( 998 expressions 999 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1000 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1001 ): 1002 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1003 return "OBJECT" 1004 1005 return super().datatype_sql(expression) 1006 1007 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1008 return self.func( 1009 "TO_NUMBER", 1010 expression.this, 1011 expression.args.get("format"), 1012 expression.args.get("precision"), 1013 expression.args.get("scale"), 1014 ) 1015 1016 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1017 milli = expression.args.get("milli") 1018 if milli is not None: 1019 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1020 expression.set("nano", milli_to_nano) 1021 1022 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1023 1024 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1025 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1026 return self.func("TO_GEOGRAPHY", expression.this) 1027 if expression.is_type(exp.DataType.Type.GEOMETRY): 1028 return self.func("TO_GEOMETRY", expression.this) 1029 1030 return super().cast_sql(expression, safe_prefix=safe_prefix) 1031 1032 def trycast_sql(self, expression: exp.TryCast) -> str: 1033 value = expression.this 1034 1035 if value.type is None: 1036 from sqlglot.optimizer.annotate_types import annotate_types 1037 1038 value = annotate_types(value) 1039 1040 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1041 return super().trycast_sql(expression) 1042 1043 # TRY_CAST only works for string values in Snowflake 1044 return self.cast_sql(expression) 1045 1046 def log_sql(self, expression: exp.Log) -> str: 1047 if not expression.expression: 1048 return self.func("LN", expression.this) 1049 1050 return super().log_sql(expression) 1051 1052 def unnest_sql(self, expression: exp.Unnest) -> str: 1053 unnest_alias = expression.args.get("alias") 1054 offset = expression.args.get("offset") 1055 1056 columns = [ 1057 exp.to_identifier("seq"), 1058 exp.to_identifier("key"), 1059 exp.to_identifier("path"), 1060 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1061 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1062 or exp.to_identifier("value"), 1063 exp.to_identifier("this"), 1064 ] 1065 1066 if unnest_alias: 1067 unnest_alias.set("columns", columns) 1068 else: 1069 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1070 1071 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1072 alias = self.sql(unnest_alias) 1073 alias = f" AS {alias}" if alias else "" 1074 return f"{explode}{alias}" 1075 1076 def show_sql(self, expression: exp.Show) -> str: 1077 terse = "TERSE " if expression.args.get("terse") else "" 1078 history = " HISTORY" if expression.args.get("history") else "" 1079 like = self.sql(expression, "like") 1080 like = f" LIKE {like}" if like else "" 1081 1082 scope = self.sql(expression, "scope") 1083 scope = f" {scope}" if scope else "" 1084 1085 scope_kind = self.sql(expression, "scope_kind") 1086 if scope_kind: 1087 scope_kind = f" IN {scope_kind}" 1088 1089 starts_with = self.sql(expression, "starts_with") 1090 if starts_with: 1091 starts_with = f" STARTS WITH {starts_with}" 1092 1093 limit = self.sql(expression, "limit") 1094 1095 from_ = self.sql(expression, "from") 1096 if from_: 1097 from_ = f" FROM {from_}" 1098 1099 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1100 1101 def describe_sql(self, expression: exp.Describe) -> str: 1102 # Default to table if kind is unknown 1103 kind_value = expression.args.get("kind") or "TABLE" 1104 kind = f" {kind_value}" if kind_value else "" 1105 this = f" {self.sql(expression, 'this')}" 1106 expressions = self.expressions(expression, flat=True) 1107 expressions = f" {expressions}" if expressions else "" 1108 return f"DESCRIBE{kind}{this}{expressions}" 1109 1110 def generatedasidentitycolumnconstraint_sql( 1111 self, expression: exp.GeneratedAsIdentityColumnConstraint 1112 ) -> str: 1113 start = expression.args.get("start") 1114 start = f" START {start}" if start else "" 1115 increment = expression.args.get("increment") 1116 increment = f" INCREMENT {increment}" if increment else "" 1117 return f"AUTOINCREMENT{start}{increment}" 1118 1119 def cluster_sql(self, expression: exp.Cluster) -> str: 1120 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1121 1122 def struct_sql(self, expression: exp.Struct) -> str: 1123 keys = [] 1124 values = [] 1125 1126 for i, e in enumerate(expression.expressions): 1127 if isinstance(e, exp.PropertyEQ): 1128 keys.append( 1129 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1130 ) 1131 values.append(e.expression) 1132 else: 1133 keys.append(exp.Literal.string(f"_{i}")) 1134 values.append(e) 1135 1136 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1137 1138 @unsupported_args("weight", "accuracy") 1139 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1140 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1141 1142 def alterset_sql(self, expression: exp.AlterSet) -> str: 1143 exprs = self.expressions(expression, flat=True) 1144 exprs = f" {exprs}" if exprs else "" 1145 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1146 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1147 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1148 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1149 tag = self.expressions(expression, key="tag", flat=True) 1150 tag = f" TAG {tag}" if tag else "" 1151 1152 return f"SET{exprs}{file_format}{copy_options}{tag}" 1153 1154 def strtotime_sql(self, expression: exp.StrToTime): 1155 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1156 return self.func( 1157 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1158 ) 1159 1160 def timestampsub_sql(self, expression: exp.TimestampSub): 1161 return self.sql( 1162 exp.TimestampAdd( 1163 this=expression.this, 1164 expression=expression.expression * -1, 1165 unit=expression.unit, 1166 ) 1167 ) 1168 1169 def jsonextract_sql(self, expression: exp.JSONExtract): 1170 this = expression.this 1171 1172 # JSON strings are valid coming from other dialects such as BQ 1173 return self.func( 1174 "GET_PATH", 1175 exp.ParseJSON(this=this) if this.is_string else this, 1176 expression.expression, 1177 ) 1178 1179 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1180 this = expression.this 1181 if not isinstance(this, exp.TsOrDsToTimestamp): 1182 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1183 1184 return self.func("TO_CHAR", this, self.format_time(expression))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
995 def datatype_sql(self, expression: exp.DataType) -> str: 996 expressions = expression.expressions 997 if ( 998 expressions 999 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1000 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1001 ): 1002 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1003 return "OBJECT" 1004 1005 return super().datatype_sql(expression)
1016 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1017 milli = expression.args.get("milli") 1018 if milli is not None: 1019 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1020 expression.set("nano", milli_to_nano) 1021 1022 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1024 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1025 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1026 return self.func("TO_GEOGRAPHY", expression.this) 1027 if expression.is_type(exp.DataType.Type.GEOMETRY): 1028 return self.func("TO_GEOMETRY", expression.this) 1029 1030 return super().cast_sql(expression, safe_prefix=safe_prefix)
1032 def trycast_sql(self, expression: exp.TryCast) -> str: 1033 value = expression.this 1034 1035 if value.type is None: 1036 from sqlglot.optimizer.annotate_types import annotate_types 1037 1038 value = annotate_types(value) 1039 1040 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1041 return super().trycast_sql(expression) 1042 1043 # TRY_CAST only works for string values in Snowflake 1044 return self.cast_sql(expression)
1052 def unnest_sql(self, expression: exp.Unnest) -> str: 1053 unnest_alias = expression.args.get("alias") 1054 offset = expression.args.get("offset") 1055 1056 columns = [ 1057 exp.to_identifier("seq"), 1058 exp.to_identifier("key"), 1059 exp.to_identifier("path"), 1060 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1061 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1062 or exp.to_identifier("value"), 1063 exp.to_identifier("this"), 1064 ] 1065 1066 if unnest_alias: 1067 unnest_alias.set("columns", columns) 1068 else: 1069 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1070 1071 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1072 alias = self.sql(unnest_alias) 1073 alias = f" AS {alias}" if alias else "" 1074 return f"{explode}{alias}"
1076 def show_sql(self, expression: exp.Show) -> str: 1077 terse = "TERSE " if expression.args.get("terse") else "" 1078 history = " HISTORY" if expression.args.get("history") else "" 1079 like = self.sql(expression, "like") 1080 like = f" LIKE {like}" if like else "" 1081 1082 scope = self.sql(expression, "scope") 1083 scope = f" {scope}" if scope else "" 1084 1085 scope_kind = self.sql(expression, "scope_kind") 1086 if scope_kind: 1087 scope_kind = f" IN {scope_kind}" 1088 1089 starts_with = self.sql(expression, "starts_with") 1090 if starts_with: 1091 starts_with = f" STARTS WITH {starts_with}" 1092 1093 limit = self.sql(expression, "limit") 1094 1095 from_ = self.sql(expression, "from") 1096 if from_: 1097 from_ = f" FROM {from_}" 1098 1099 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1101 def describe_sql(self, expression: exp.Describe) -> str: 1102 # Default to table if kind is unknown 1103 kind_value = expression.args.get("kind") or "TABLE" 1104 kind = f" {kind_value}" if kind_value else "" 1105 this = f" {self.sql(expression, 'this')}" 1106 expressions = self.expressions(expression, flat=True) 1107 expressions = f" {expressions}" if expressions else "" 1108 return f"DESCRIBE{kind}{this}{expressions}"
1110 def generatedasidentitycolumnconstraint_sql( 1111 self, expression: exp.GeneratedAsIdentityColumnConstraint 1112 ) -> str: 1113 start = expression.args.get("start") 1114 start = f" START {start}" if start else "" 1115 increment = expression.args.get("increment") 1116 increment = f" INCREMENT {increment}" if increment else "" 1117 return f"AUTOINCREMENT{start}{increment}"
1122 def struct_sql(self, expression: exp.Struct) -> str: 1123 keys = [] 1124 values = [] 1125 1126 for i, e in enumerate(expression.expressions): 1127 if isinstance(e, exp.PropertyEQ): 1128 keys.append( 1129 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1130 ) 1131 values.append(e.expression) 1132 else: 1133 keys.append(exp.Literal.string(f"_{i}")) 1134 values.append(e) 1135 1136 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1142 def alterset_sql(self, expression: exp.AlterSet) -> str: 1143 exprs = self.expressions(expression, flat=True) 1144 exprs = f" {exprs}" if exprs else "" 1145 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1146 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1147 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1148 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1149 tag = self.expressions(expression, key="tag", flat=True) 1150 tag = f" TAG {tag}" if tag else "" 1151 1152 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql