sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 annotate_with_type_lambda, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 build_formatted_time, 17 filter_array_using_unnest, 18 if_sql, 19 inline_array_unless_query, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 build_date_delta_with_interval, 24 regexp_replace_sql, 25 rename_func, 26 sha256_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_var, 30 strposition_sql, 31 groupconcat_sql, 32) 33from sqlglot.helper import seq_get, split_num_words 34from sqlglot.tokens import TokenType 35from sqlglot.generator import unsupported_args 36 37if t.TYPE_CHECKING: 38 from sqlglot._typing import E, Lit 39 40 from sqlglot.optimizer.annotate_types import TypeAnnotator 41 42logger = logging.getLogger("sqlglot") 43 44 45JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 46 47DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 48 49 50def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 51 if not expression.find_ancestor(exp.From, exp.Join): 52 return self.values_sql(expression) 53 54 structs = [] 55 alias = expression.args.get("alias") 56 for tup in expression.find_all(exp.Tuple): 57 field_aliases = ( 58 alias.columns 59 if alias and alias.columns 60 else (f"_c{i}" for i in range(len(tup.expressions))) 61 ) 62 expressions = [ 63 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 64 for name, fld in zip(field_aliases, tup.expressions) 65 ] 66 structs.append(exp.Struct(expressions=expressions)) 67 68 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 69 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 70 return self.unnest_sql( 71 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 72 ) 73 74 75def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 76 this = expression.this 77 if isinstance(this, exp.Schema): 78 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 79 else: 80 this = self.sql(this) 81 return f"RETURNS {this}" 82 83 84def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 85 returns = expression.find(exp.ReturnsProperty) 86 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 87 expression.set("kind", "TABLE FUNCTION") 88 89 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 90 expression.set("expression", expression.expression.this) 91 92 return self.create_sql(expression) 93 94 95# https://issuetracker.google.com/issues/162294746 96# workaround for bigquery bug when grouping by an expression and then ordering 97# WITH x AS (SELECT 1 y) 98# SELECT y + 1 z 99# FROM x 100# GROUP BY x + 1 101# ORDER by z 102def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 103 if isinstance(expression, exp.Select): 104 group = expression.args.get("group") 105 order = expression.args.get("order") 106 107 if group and order: 108 aliases = { 109 select.this: select.args["alias"] 110 for select in expression.selects 111 if isinstance(select, exp.Alias) 112 } 113 114 for grouped in group.expressions: 115 if grouped.is_int: 116 continue 117 alias = aliases.get(grouped) 118 if alias: 119 grouped.replace(exp.column(alias)) 120 121 return expression 122 123 124def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 125 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 126 if isinstance(expression, exp.CTE) and expression.alias_column_names: 127 cte_query = expression.this 128 129 if cte_query.is_star: 130 logger.warning( 131 "Can't push down CTE column names for star queries. Run the query through" 132 " the optimizer or use 'qualify' to expand the star projections first." 133 ) 134 return expression 135 136 column_names = expression.alias_column_names 137 expression.args["alias"].set("columns", None) 138 139 for name, select in zip(column_names, cte_query.selects): 140 to_replace = select 141 142 if isinstance(select, exp.Alias): 143 select = select.this 144 145 # Inner aliases are shadowed by the CTE column names 146 to_replace.replace(exp.alias_(select, name)) 147 148 return expression 149 150 151def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 152 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 153 this.set("zone", seq_get(args, 2)) 154 return this 155 156 157def _build_timestamp(args: t.List) -> exp.Timestamp: 158 timestamp = exp.Timestamp.from_arg_list(args) 159 timestamp.set("with_tz", True) 160 return timestamp 161 162 163def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 164 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 165 return expr_type.from_arg_list(args) 166 167 168def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 169 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 170 arg = seq_get(args, 0) 171 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 172 173 174def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 175 return self.sql( 176 exp.Exists( 177 this=exp.select("1") 178 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 179 .where(exp.column("_col").eq(expression.right)) 180 ) 181 ) 182 183 184def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 185 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 186 187 188def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 189 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 190 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 191 unit = unit_to_var(expression) 192 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 193 194 195def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 196 scale = expression.args.get("scale") 197 timestamp = expression.this 198 199 if scale in (None, exp.UnixToTime.SECONDS): 200 return self.func("TIMESTAMP_SECONDS", timestamp) 201 if scale == exp.UnixToTime.MILLIS: 202 return self.func("TIMESTAMP_MILLIS", timestamp) 203 if scale == exp.UnixToTime.MICROS: 204 return self.func("TIMESTAMP_MICROS", timestamp) 205 206 unix_seconds = exp.cast( 207 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 208 ) 209 return self.func("TIMESTAMP_SECONDS", unix_seconds) 210 211 212def _build_time(args: t.List) -> exp.Func: 213 if len(args) == 1: 214 return exp.TsOrDsToTime(this=args[0]) 215 if len(args) == 2: 216 return exp.Time.from_arg_list(args) 217 return exp.TimeFromParts.from_arg_list(args) 218 219 220def _build_datetime(args: t.List) -> exp.Func: 221 if len(args) == 1: 222 return exp.TsOrDsToDatetime.from_arg_list(args) 223 if len(args) == 2: 224 return exp.Datetime.from_arg_list(args) 225 return exp.TimestampFromParts.from_arg_list(args) 226 227 228def _build_regexp_extract( 229 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 230) -> t.Callable[[t.List], E]: 231 def _builder(args: t.List) -> E: 232 try: 233 group = re.compile(args[1].name).groups == 1 234 except re.error: 235 group = False 236 237 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 238 return expr_type( 239 this=seq_get(args, 0), 240 expression=seq_get(args, 1), 241 position=seq_get(args, 2), 242 occurrence=seq_get(args, 3), 243 group=exp.Literal.number(1) if group else default_group, 244 ) 245 246 return _builder 247 248 249def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 250 def _builder(args: t.List, dialect: Dialect) -> E: 251 if len(args) == 1: 252 # The default value for the JSONPath is '$' i.e all of the data 253 args.append(exp.Literal.string("$")) 254 return parser.build_extract_json_with_path(expr_type)(args, dialect) 255 256 return _builder 257 258 259def _str_to_datetime_sql( 260 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 261) -> str: 262 this = self.sql(expression, "this") 263 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 264 265 if expression.args.get("safe"): 266 fmt = self.format_time( 267 expression, 268 self.dialect.INVERSE_FORMAT_MAPPING, 269 self.dialect.INVERSE_FORMAT_TRIE, 270 ) 271 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 272 273 fmt = self.format_time(expression) 274 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 275 276 277def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 278 """ 279 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 280 +---------+---------+---------+------------+---------+ 281 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 282 +---------+---------+---------+------------+---------+ 283 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 284 +---------+---------+---------+------------+---------+ 285 """ 286 self._annotate_args(expression) 287 288 this: exp.Expression = expression.this 289 290 self._set_type( 291 expression, 292 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 293 ) 294 return expression 295 296 297@unsupported_args("ins_cost", "del_cost", "sub_cost") 298def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 299 max_dist = expression.args.get("max_dist") 300 if max_dist: 301 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 302 303 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 304 305 306def _build_levenshtein(args: t.List) -> exp.Levenshtein: 307 max_dist = seq_get(args, 2) 308 return exp.Levenshtein( 309 this=seq_get(args, 0), 310 expression=seq_get(args, 1), 311 max_dist=max_dist.expression if max_dist else None, 312 ) 313 314 315def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 316 def _builder(args: t.List) -> exp.TimeToStr: 317 return exp.TimeToStr( 318 this=expr_type(this=seq_get(args, 1)), 319 format=seq_get(args, 0), 320 zone=seq_get(args, 2), 321 ) 322 323 return _builder 324 325 326def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 327 if len(args) == 3: 328 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 329 330 # Lowercase the operands in case of transpilation, as exp.Contains 331 # is case-sensitive on other dialects 332 this = exp.Lower(this=seq_get(args, 0)) 333 expr = exp.Lower(this=seq_get(args, 1)) 334 335 return exp.Contains(this=this, expression=expr) 336 337 338def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 339 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 340 upper = name.upper() 341 342 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 343 344 if dquote_escaping: 345 self._quote_json_path_key_using_brackets = False 346 347 sql = rename_func(upper)(self, expression) 348 349 if dquote_escaping: 350 self._quote_json_path_key_using_brackets = True 351 352 return sql 353 354 355class BigQuery(Dialect): 356 WEEK_OFFSET = -1 357 UNNEST_COLUMN_ONLY = True 358 SUPPORTS_USER_DEFINED_TYPES = False 359 SUPPORTS_SEMI_ANTI_JOIN = False 360 LOG_BASE_FIRST = False 361 HEX_LOWERCASE = True 362 FORCE_EARLY_ALIAS_REF_EXPANSION = True 363 PRESERVE_ORIGINAL_NAMES = True 364 HEX_STRING_IS_INTEGER_TYPE = True 365 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 367 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 368 369 # bigquery udfs are case sensitive 370 NORMALIZE_FUNCTIONS = False 371 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 373 TIME_MAPPING = { 374 "%D": "%m/%d/%y", 375 "%E6S": "%S.%f", 376 "%e": "%-d", 377 } 378 379 FORMAT_MAPPING = { 380 "DD": "%d", 381 "MM": "%m", 382 "MON": "%b", 383 "MONTH": "%B", 384 "YYYY": "%Y", 385 "YY": "%y", 386 "HH": "%I", 387 "HH12": "%I", 388 "HH24": "%H", 389 "MI": "%M", 390 "SS": "%S", 391 "SSSSS": "%f", 392 "TZH": "%z", 393 } 394 395 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 396 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 397 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 398 399 # All set operations require either a DISTINCT or ALL specifier 400 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 401 402 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 403 TYPE_TO_EXPRESSIONS = { 404 **Dialect.TYPE_TO_EXPRESSIONS, 405 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 406 } 407 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 408 409 ANNOTATORS = { 410 **Dialect.ANNOTATORS, 411 **{ 412 expr_type: annotate_with_type_lambda(data_type) 413 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 414 for expr_type in expressions 415 }, 416 **{ 417 expr_type: lambda self, e: _annotate_math_functions(self, e) 418 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 419 }, 420 **{ 421 expr_type: lambda self, e: self._annotate_by_args(e, "this") 422 for expr_type in ( 423 exp.Left, 424 exp.Right, 425 exp.Lower, 426 exp.Upper, 427 exp.Pad, 428 exp.Trim, 429 exp.RegexpExtract, 430 exp.RegexpReplace, 431 exp.Repeat, 432 exp.Substring, 433 ) 434 }, 435 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 436 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 437 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 438 } 439 440 def normalize_identifier(self, expression: E) -> E: 441 if ( 442 isinstance(expression, exp.Identifier) 443 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 444 ): 445 parent = expression.parent 446 while isinstance(parent, exp.Dot): 447 parent = parent.parent 448 449 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 450 # by default. The following check uses a heuristic to detect tables based on whether 451 # they are qualified. This should generally be correct, because tables in BigQuery 452 # must be qualified with at least a dataset, unless @@dataset_id is set. 453 case_sensitive = ( 454 isinstance(parent, exp.UserDefinedFunction) 455 or ( 456 isinstance(parent, exp.Table) 457 and parent.db 458 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 459 ) 460 or expression.meta.get("is_table") 461 ) 462 if not case_sensitive: 463 expression.set("this", expression.this.lower()) 464 465 return expression 466 467 class Tokenizer(tokens.Tokenizer): 468 QUOTES = ["'", '"', '"""', "'''"] 469 COMMENTS = ["--", "#", ("/*", "*/")] 470 IDENTIFIERS = ["`"] 471 STRING_ESCAPES = ["\\"] 472 473 HEX_STRINGS = [("0x", ""), ("0X", "")] 474 475 BYTE_STRINGS = [ 476 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 477 ] 478 479 RAW_STRINGS = [ 480 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 481 ] 482 483 KEYWORDS = { 484 **tokens.Tokenizer.KEYWORDS, 485 "ANY TYPE": TokenType.VARIANT, 486 "BEGIN": TokenType.COMMAND, 487 "BEGIN TRANSACTION": TokenType.BEGIN, 488 "BYTEINT": TokenType.INT, 489 "BYTES": TokenType.BINARY, 490 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 491 "DATETIME": TokenType.TIMESTAMP, 492 "DECLARE": TokenType.COMMAND, 493 "ELSEIF": TokenType.COMMAND, 494 "EXCEPTION": TokenType.COMMAND, 495 "EXPORT": TokenType.EXPORT, 496 "FLOAT64": TokenType.DOUBLE, 497 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 498 "MODEL": TokenType.MODEL, 499 "NOT DETERMINISTIC": TokenType.VOLATILE, 500 "RECORD": TokenType.STRUCT, 501 "TIMESTAMP": TokenType.TIMESTAMPTZ, 502 } 503 KEYWORDS.pop("DIV") 504 KEYWORDS.pop("VALUES") 505 KEYWORDS.pop("/*+") 506 507 class Parser(parser.Parser): 508 PREFIXED_PIVOT_COLUMNS = True 509 LOG_DEFAULTS_TO_LN = True 510 SUPPORTS_IMPLICIT_UNNEST = True 511 512 FUNCTIONS = { 513 **parser.Parser.FUNCTIONS, 514 "CONTAINS_SUBSTR": _build_contains_substring, 515 "DATE": _build_date, 516 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 517 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 518 "DATE_TRUNC": lambda args: exp.DateTrunc( 519 unit=exp.Literal.string(str(seq_get(args, 1))), 520 this=seq_get(args, 0), 521 zone=seq_get(args, 2), 522 ), 523 "DATETIME": _build_datetime, 524 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 525 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 526 "DIV": binary_from_function(exp.IntDiv), 527 "EDIT_DISTANCE": _build_levenshtein, 528 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 529 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 530 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 531 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 532 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 533 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 534 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 535 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 536 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 537 "MD5": exp.MD5Digest.from_arg_list, 538 "TO_HEX": _build_to_hex, 539 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 540 [seq_get(args, 1), seq_get(args, 0)] 541 ), 542 "PARSE_TIMESTAMP": _build_parse_timestamp, 543 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 544 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 545 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 546 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 547 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 548 ), 549 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 550 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 551 "SPLIT": lambda args: exp.Split( 552 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 553 this=seq_get(args, 0), 554 expression=seq_get(args, 1) or exp.Literal.string(","), 555 ), 556 "STRPOS": exp.StrPosition.from_arg_list, 557 "TIME": _build_time, 558 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 559 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 560 "TIMESTAMP": _build_timestamp, 561 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 562 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 563 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 564 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 565 ), 566 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 567 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 568 ), 569 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 570 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 571 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 572 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 573 } 574 575 FUNCTION_PARSERS = { 576 **parser.Parser.FUNCTION_PARSERS, 577 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 578 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 579 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 580 } 581 FUNCTION_PARSERS.pop("TRIM") 582 583 NO_PAREN_FUNCTIONS = { 584 **parser.Parser.NO_PAREN_FUNCTIONS, 585 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 586 } 587 588 NESTED_TYPE_TOKENS = { 589 *parser.Parser.NESTED_TYPE_TOKENS, 590 TokenType.TABLE, 591 } 592 593 PROPERTY_PARSERS = { 594 **parser.Parser.PROPERTY_PARSERS, 595 "NOT DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 597 ), 598 "OPTIONS": lambda self: self._parse_with_property(), 599 } 600 601 CONSTRAINT_PARSERS = { 602 **parser.Parser.CONSTRAINT_PARSERS, 603 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 604 } 605 606 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 607 RANGE_PARSERS.pop(TokenType.OVERLAPS) 608 609 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 610 611 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 612 613 STATEMENT_PARSERS = { 614 **parser.Parser.STATEMENT_PARSERS, 615 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 616 TokenType.END: lambda self: self._parse_as_command(self._prev), 617 TokenType.FOR: lambda self: self._parse_for_in(), 618 TokenType.EXPORT: lambda self: self._parse_export_data(), 619 } 620 621 BRACKET_OFFSETS = { 622 "OFFSET": (0, False), 623 "ORDINAL": (1, False), 624 "SAFE_OFFSET": (0, True), 625 "SAFE_ORDINAL": (1, True), 626 } 627 628 def _parse_for_in(self) -> exp.ForIn: 629 this = self._parse_range() 630 self._match_text_seq("DO") 631 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 632 633 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 634 this = super()._parse_table_part(schema=schema) or self._parse_number() 635 636 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 637 if isinstance(this, exp.Identifier): 638 table_name = this.name 639 while self._match(TokenType.DASH, advance=False) and self._next: 640 start = self._curr 641 while self._is_connected() and not self._match_set( 642 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 643 ): 644 self._advance() 645 646 table_name += self._find_sql(start, self._prev) 647 648 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 649 elif isinstance(this, exp.Literal): 650 table_name = this.name 651 652 if self._is_connected() and self._parse_var(any_token=True): 653 table_name += self._prev.text 654 655 this = exp.Identifier(this=table_name, quoted=True) 656 657 return this 658 659 def _parse_table_parts( 660 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 661 ) -> exp.Table: 662 table = super()._parse_table_parts( 663 schema=schema, is_db_reference=is_db_reference, wildcard=True 664 ) 665 666 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 667 if not table.catalog: 668 if table.db: 669 parts = table.db.split(".") 670 if len(parts) == 2 and not table.args["db"].quoted: 671 table.set("catalog", exp.Identifier(this=parts[0])) 672 table.set("db", exp.Identifier(this=parts[1])) 673 else: 674 parts = table.name.split(".") 675 if len(parts) == 2 and not table.this.quoted: 676 table.set("db", exp.Identifier(this=parts[0])) 677 table.set("this", exp.Identifier(this=parts[1])) 678 679 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 680 alias = table.this 681 catalog, db, this, *rest = ( 682 exp.to_identifier(p, quoted=True) 683 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 684 ) 685 686 if rest and this: 687 this = exp.Dot.build([this, *rest]) # type: ignore 688 689 table = exp.Table( 690 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 691 ) 692 table.meta["quoted_table"] = True 693 else: 694 alias = None 695 696 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 697 # dataset, so if the project identifier is omitted we need to fix the ast so that 698 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 699 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 700 # views, because it would seem like the "catalog" part is set, when it'd actually 701 # be the region/dataset. Merging the two identifiers into a single one is done to 702 # avoid producing a 4-part Table reference, which would cause issues in the schema 703 # module, when there are 3-part table names mixed with information schema views. 704 # 705 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 706 table_parts = table.parts 707 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 708 # We need to alias the table here to avoid breaking existing qualified columns. 709 # This is expected to be safe, because if there's an actual alias coming up in 710 # the token stream, it will overwrite this one. If there isn't one, we are only 711 # exposing the name that can be used to reference the view explicitly (a no-op). 712 exp.alias_( 713 table, 714 t.cast(exp.Identifier, alias or table_parts[-1]), 715 table=True, 716 copy=False, 717 ) 718 719 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 720 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 721 table.set("db", seq_get(table_parts, -3)) 722 table.set("catalog", seq_get(table_parts, -4)) 723 724 return table 725 726 def _parse_column(self) -> t.Optional[exp.Expression]: 727 column = super()._parse_column() 728 if isinstance(column, exp.Column): 729 parts = column.parts 730 if any("." in p.name for p in parts): 731 catalog, db, table, this, *rest = ( 732 exp.to_identifier(p, quoted=True) 733 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 734 ) 735 736 if rest and this: 737 this = exp.Dot.build([this, *rest]) # type: ignore 738 739 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 740 column.meta["quoted_column"] = True 741 742 return column 743 744 @t.overload 745 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 746 747 @t.overload 748 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 749 750 def _parse_json_object(self, agg=False): 751 json_object = super()._parse_json_object() 752 array_kv_pair = seq_get(json_object.expressions, 0) 753 754 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 755 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 756 if ( 757 array_kv_pair 758 and isinstance(array_kv_pair.this, exp.Array) 759 and isinstance(array_kv_pair.expression, exp.Array) 760 ): 761 keys = array_kv_pair.this.expressions 762 values = array_kv_pair.expression.expressions 763 764 json_object.set( 765 "expressions", 766 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 767 ) 768 769 return json_object 770 771 def _parse_bracket( 772 self, this: t.Optional[exp.Expression] = None 773 ) -> t.Optional[exp.Expression]: 774 bracket = super()._parse_bracket(this) 775 776 if this is bracket: 777 return bracket 778 779 if isinstance(bracket, exp.Bracket): 780 for expression in bracket.expressions: 781 name = expression.name.upper() 782 783 if name not in self.BRACKET_OFFSETS: 784 break 785 786 offset, safe = self.BRACKET_OFFSETS[name] 787 bracket.set("offset", offset) 788 bracket.set("safe", safe) 789 expression.replace(expression.expressions[0]) 790 791 return bracket 792 793 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 794 unnest = super()._parse_unnest(with_alias=with_alias) 795 796 if not unnest: 797 return None 798 799 unnest_expr = seq_get(unnest.expressions, 0) 800 if unnest_expr: 801 from sqlglot.optimizer.annotate_types import annotate_types 802 803 unnest_expr = annotate_types(unnest_expr) 804 805 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 806 # in contrast to other dialects such as DuckDB which flattens only the array by default 807 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 808 array_elem.is_type(exp.DataType.Type.STRUCT) 809 for array_elem in unnest_expr._type.expressions 810 ): 811 unnest.set("explode_array", True) 812 813 return unnest 814 815 def _parse_make_interval(self) -> exp.MakeInterval: 816 expr = exp.MakeInterval() 817 818 for arg_key in expr.arg_types: 819 value = self._parse_lambda() 820 821 if not value: 822 break 823 824 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 825 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 826 if isinstance(value, exp.Kwarg): 827 arg_key = value.this.name 828 829 expr.set(arg_key, value) 830 831 self._match(TokenType.COMMA) 832 833 return expr 834 835 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 836 expr = self.expression( 837 exp.FeaturesAtTime, 838 this=(self._match(TokenType.TABLE) and self._parse_table()) 839 or self._parse_select(nested=True), 840 ) 841 842 while self._match(TokenType.COMMA): 843 arg = self._parse_lambda() 844 845 # Get the LHS of the Kwarg and set the arg to that value, e.g 846 # "num_rows => 1" sets the expr's `num_rows` arg 847 if arg: 848 expr.set(arg.this.name, arg) 849 850 return expr 851 852 def _parse_export_data(self) -> exp.Export: 853 self._match_text_seq("DATA") 854 855 return self.expression( 856 exp.Export, 857 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 858 options=self._parse_properties(), 859 this=self._match_text_seq("AS") and self._parse_select(), 860 ) 861 862 class Generator(generator.Generator): 863 INTERVAL_ALLOWS_PLURAL_FORM = False 864 JOIN_HINTS = False 865 QUERY_HINTS = False 866 TABLE_HINTS = False 867 LIMIT_FETCH = "LIMIT" 868 RENAME_TABLE_WITH_DB = False 869 NVL2_SUPPORTED = False 870 UNNEST_WITH_ORDINALITY = False 871 COLLATE_IS_FUNC = True 872 LIMIT_ONLY_LITERALS = True 873 SUPPORTS_TABLE_ALIAS_COLUMNS = False 874 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 875 JSON_KEY_VALUE_PAIR_SEP = "," 876 NULL_ORDERING_SUPPORTED = False 877 IGNORE_NULLS_IN_FUNC = True 878 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 879 CAN_IMPLEMENT_ARRAY_ANY = True 880 SUPPORTS_TO_NUMBER = False 881 NAMED_PLACEHOLDER_TOKEN = "@" 882 HEX_FUNC = "TO_HEX" 883 WITH_PROPERTIES_PREFIX = "OPTIONS" 884 SUPPORTS_EXPLODING_PROJECTIONS = False 885 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 886 SUPPORTS_UNIX_SECONDS = True 887 888 TRANSFORMS = { 889 **generator.Generator.TRANSFORMS, 890 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 891 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 892 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 893 exp.Array: inline_array_unless_query, 894 exp.ArrayContains: _array_contains_sql, 895 exp.ArrayFilter: filter_array_using_unnest, 896 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 897 exp.CollateProperty: lambda self, e: ( 898 f"DEFAULT COLLATE {self.sql(e, 'this')}" 899 if e.args.get("default") 900 else f"COLLATE {self.sql(e, 'this')}" 901 ), 902 exp.Commit: lambda *_: "COMMIT TRANSACTION", 903 exp.CountIf: rename_func("COUNTIF"), 904 exp.Create: _create_sql, 905 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 906 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 907 exp.DateDiff: lambda self, e: self.func( 908 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 909 ), 910 exp.DateFromParts: rename_func("DATE"), 911 exp.DateStrToDate: datestrtodate_sql, 912 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 913 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 914 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 915 exp.DateTrunc: lambda self, e: self.func( 916 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 917 ), 918 exp.FromTimeZone: lambda self, e: self.func( 919 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 920 ), 921 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 922 exp.GroupConcat: lambda self, e: groupconcat_sql( 923 self, e, func_name="STRING_AGG", within_group=False 924 ), 925 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 926 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 927 exp.If: if_sql(false_value="NULL"), 928 exp.ILike: no_ilike_sql, 929 exp.IntDiv: rename_func("DIV"), 930 exp.Int64: rename_func("INT64"), 931 exp.JSONExtract: _json_extract_sql, 932 exp.JSONExtractArray: _json_extract_sql, 933 exp.JSONExtractScalar: _json_extract_sql, 934 exp.JSONFormat: rename_func("TO_JSON_STRING"), 935 exp.Levenshtein: _levenshtein_sql, 936 exp.Max: max_or_greatest, 937 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 938 exp.MD5Digest: rename_func("MD5"), 939 exp.Min: min_or_least, 940 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 941 exp.RegexpExtract: lambda self, e: self.func( 942 "REGEXP_EXTRACT", 943 e.this, 944 e.expression, 945 e.args.get("position"), 946 e.args.get("occurrence"), 947 ), 948 exp.RegexpExtractAll: lambda self, e: self.func( 949 "REGEXP_EXTRACT_ALL", e.this, e.expression 950 ), 951 exp.RegexpReplace: regexp_replace_sql, 952 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 953 exp.ReturnsProperty: _returnsproperty_sql, 954 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 955 exp.Select: transforms.preprocess( 956 [ 957 transforms.explode_projection_to_unnest(), 958 transforms.unqualify_unnest, 959 transforms.eliminate_distinct_on, 960 _alias_ordered_group, 961 transforms.eliminate_semi_and_anti_joins, 962 ] 963 ), 964 exp.SHA: rename_func("SHA1"), 965 exp.SHA2: sha256_sql, 966 exp.StabilityProperty: lambda self, e: ( 967 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 968 ), 969 exp.String: rename_func("STRING"), 970 exp.StrPosition: lambda self, e: ( 971 strposition_sql( 972 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 973 ) 974 ), 975 exp.StrToDate: _str_to_datetime_sql, 976 exp.StrToTime: _str_to_datetime_sql, 977 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 978 exp.TimeFromParts: rename_func("TIME"), 979 exp.TimestampFromParts: rename_func("DATETIME"), 980 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 981 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 982 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 983 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 984 exp.TimeStrToTime: timestrtotime_sql, 985 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 986 exp.TsOrDsAdd: _ts_or_ds_add_sql, 987 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 988 exp.TsOrDsToTime: rename_func("TIME"), 989 exp.TsOrDsToDatetime: rename_func("DATETIME"), 990 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 991 exp.Unhex: rename_func("FROM_HEX"), 992 exp.UnixDate: rename_func("UNIX_DATE"), 993 exp.UnixToTime: _unix_to_time_sql, 994 exp.Uuid: lambda *_: "GENERATE_UUID()", 995 exp.Values: _derived_table_values_to_unnest, 996 exp.VariancePop: rename_func("VAR_POP"), 997 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 998 } 999 1000 SUPPORTED_JSON_PATH_PARTS = { 1001 exp.JSONPathKey, 1002 exp.JSONPathRoot, 1003 exp.JSONPathSubscript, 1004 } 1005 1006 TYPE_MAPPING = { 1007 **generator.Generator.TYPE_MAPPING, 1008 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1009 exp.DataType.Type.BIGINT: "INT64", 1010 exp.DataType.Type.BINARY: "BYTES", 1011 exp.DataType.Type.BLOB: "BYTES", 1012 exp.DataType.Type.BOOLEAN: "BOOL", 1013 exp.DataType.Type.CHAR: "STRING", 1014 exp.DataType.Type.DECIMAL: "NUMERIC", 1015 exp.DataType.Type.DOUBLE: "FLOAT64", 1016 exp.DataType.Type.FLOAT: "FLOAT64", 1017 exp.DataType.Type.INT: "INT64", 1018 exp.DataType.Type.NCHAR: "STRING", 1019 exp.DataType.Type.NVARCHAR: "STRING", 1020 exp.DataType.Type.SMALLINT: "INT64", 1021 exp.DataType.Type.TEXT: "STRING", 1022 exp.DataType.Type.TIMESTAMP: "DATETIME", 1023 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1024 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1025 exp.DataType.Type.TINYINT: "INT64", 1026 exp.DataType.Type.ROWVERSION: "BYTES", 1027 exp.DataType.Type.UUID: "STRING", 1028 exp.DataType.Type.VARBINARY: "BYTES", 1029 exp.DataType.Type.VARCHAR: "STRING", 1030 exp.DataType.Type.VARIANT: "ANY TYPE", 1031 } 1032 1033 PROPERTIES_LOCATION = { 1034 **generator.Generator.PROPERTIES_LOCATION, 1035 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1036 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1037 } 1038 1039 # WINDOW comes after QUALIFY 1040 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1041 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1042 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1043 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1044 } 1045 1046 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1047 RESERVED_KEYWORDS = { 1048 "all", 1049 "and", 1050 "any", 1051 "array", 1052 "as", 1053 "asc", 1054 "assert_rows_modified", 1055 "at", 1056 "between", 1057 "by", 1058 "case", 1059 "cast", 1060 "collate", 1061 "contains", 1062 "create", 1063 "cross", 1064 "cube", 1065 "current", 1066 "default", 1067 "define", 1068 "desc", 1069 "distinct", 1070 "else", 1071 "end", 1072 "enum", 1073 "escape", 1074 "except", 1075 "exclude", 1076 "exists", 1077 "extract", 1078 "false", 1079 "fetch", 1080 "following", 1081 "for", 1082 "from", 1083 "full", 1084 "group", 1085 "grouping", 1086 "groups", 1087 "hash", 1088 "having", 1089 "if", 1090 "ignore", 1091 "in", 1092 "inner", 1093 "intersect", 1094 "interval", 1095 "into", 1096 "is", 1097 "join", 1098 "lateral", 1099 "left", 1100 "like", 1101 "limit", 1102 "lookup", 1103 "merge", 1104 "natural", 1105 "new", 1106 "no", 1107 "not", 1108 "null", 1109 "nulls", 1110 "of", 1111 "on", 1112 "or", 1113 "order", 1114 "outer", 1115 "over", 1116 "partition", 1117 "preceding", 1118 "proto", 1119 "qualify", 1120 "range", 1121 "recursive", 1122 "respect", 1123 "right", 1124 "rollup", 1125 "rows", 1126 "select", 1127 "set", 1128 "some", 1129 "struct", 1130 "tablesample", 1131 "then", 1132 "to", 1133 "treat", 1134 "true", 1135 "unbounded", 1136 "union", 1137 "unnest", 1138 "using", 1139 "when", 1140 "where", 1141 "window", 1142 "with", 1143 "within", 1144 } 1145 1146 def mod_sql(self, expression: exp.Mod) -> str: 1147 this = expression.this 1148 expr = expression.expression 1149 return self.func( 1150 "MOD", 1151 this.unnest() if isinstance(this, exp.Paren) else this, 1152 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1153 ) 1154 1155 def column_parts(self, expression: exp.Column) -> str: 1156 if expression.meta.get("quoted_column"): 1157 # If a column reference is of the form `dataset.table`.name, we need 1158 # to preserve the quoted table path, otherwise the reference breaks 1159 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1160 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1161 return f"{table_path}.{self.sql(expression, 'this')}" 1162 1163 return super().column_parts(expression) 1164 1165 def table_parts(self, expression: exp.Table) -> str: 1166 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1167 # we need to make sure the correct quoting is used in each case. 1168 # 1169 # For example, if there is a CTE x that clashes with a schema name, then the former will 1170 # return the table y in that schema, whereas the latter will return the CTE's y column: 1171 # 1172 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1173 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1174 if expression.meta.get("quoted_table"): 1175 table_parts = ".".join(p.name for p in expression.parts) 1176 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1177 1178 return super().table_parts(expression) 1179 1180 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1181 this = expression.this 1182 if isinstance(this, exp.TsOrDsToDatetime): 1183 func_name = "FORMAT_DATETIME" 1184 elif isinstance(this, exp.TsOrDsToTimestamp): 1185 func_name = "FORMAT_TIMESTAMP" 1186 else: 1187 func_name = "FORMAT_DATE" 1188 1189 time_expr = ( 1190 this 1191 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1192 else expression 1193 ) 1194 return self.func( 1195 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1196 ) 1197 1198 def eq_sql(self, expression: exp.EQ) -> str: 1199 # Operands of = cannot be NULL in BigQuery 1200 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1201 if not isinstance(expression.parent, exp.Update): 1202 return "NULL" 1203 1204 return self.binary(expression, "=") 1205 1206 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1207 parent = expression.parent 1208 1209 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1210 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1211 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1212 return self.func( 1213 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1214 ) 1215 1216 return super().attimezone_sql(expression) 1217 1218 def trycast_sql(self, expression: exp.TryCast) -> str: 1219 return self.cast_sql(expression, safe_prefix="SAFE_") 1220 1221 def bracket_sql(self, expression: exp.Bracket) -> str: 1222 this = expression.this 1223 expressions = expression.expressions 1224 1225 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1226 arg = expressions[0] 1227 if arg.type is None: 1228 from sqlglot.optimizer.annotate_types import annotate_types 1229 1230 arg = annotate_types(arg) 1231 1232 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1233 # BQ doesn't support bracket syntax with string values for structs 1234 return f"{self.sql(this)}.{arg.name}" 1235 1236 expressions_sql = self.expressions(expression, flat=True) 1237 offset = expression.args.get("offset") 1238 1239 if offset == 0: 1240 expressions_sql = f"OFFSET({expressions_sql})" 1241 elif offset == 1: 1242 expressions_sql = f"ORDINAL({expressions_sql})" 1243 elif offset is not None: 1244 self.unsupported(f"Unsupported array offset: {offset}") 1245 1246 if expression.args.get("safe"): 1247 expressions_sql = f"SAFE_{expressions_sql}" 1248 1249 return f"{self.sql(this)}[{expressions_sql}]" 1250 1251 def in_unnest_op(self, expression: exp.Unnest) -> str: 1252 return self.sql(expression) 1253 1254 def version_sql(self, expression: exp.Version) -> str: 1255 if expression.name == "TIMESTAMP": 1256 expression.set("this", "SYSTEM_TIME") 1257 return super().version_sql(expression) 1258 1259 def contains_sql(self, expression: exp.Contains) -> str: 1260 this = expression.this 1261 expr = expression.expression 1262 1263 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1264 this = this.this 1265 expr = expr.this 1266 1267 return self.func("CONTAINS_SUBSTR", this, expr) 1268 1269 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1270 this = expression.this 1271 1272 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1273 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1274 # because they aren't literals and so the above syntax is invalid BigQuery. 1275 if isinstance(this, exp.Array): 1276 elem = seq_get(this.expressions, 0) 1277 if not (elem and elem.find(exp.Query)): 1278 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1279 1280 return super().cast_sql(expression, safe_prefix=safe_prefix)
356class BigQuery(Dialect): 357 WEEK_OFFSET = -1 358 UNNEST_COLUMN_ONLY = True 359 SUPPORTS_USER_DEFINED_TYPES = False 360 SUPPORTS_SEMI_ANTI_JOIN = False 361 LOG_BASE_FIRST = False 362 HEX_LOWERCASE = True 363 FORCE_EARLY_ALIAS_REF_EXPANSION = True 364 PRESERVE_ORIGINAL_NAMES = True 365 HEX_STRING_IS_INTEGER_TYPE = True 366 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 368 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 369 370 # bigquery udfs are case sensitive 371 NORMALIZE_FUNCTIONS = False 372 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 374 TIME_MAPPING = { 375 "%D": "%m/%d/%y", 376 "%E6S": "%S.%f", 377 "%e": "%-d", 378 } 379 380 FORMAT_MAPPING = { 381 "DD": "%d", 382 "MM": "%m", 383 "MON": "%b", 384 "MONTH": "%B", 385 "YYYY": "%Y", 386 "YY": "%y", 387 "HH": "%I", 388 "HH12": "%I", 389 "HH24": "%H", 390 "MI": "%M", 391 "SS": "%S", 392 "SSSSS": "%f", 393 "TZH": "%z", 394 } 395 396 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 397 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 398 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 399 400 # All set operations require either a DISTINCT or ALL specifier 401 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 402 403 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 404 TYPE_TO_EXPRESSIONS = { 405 **Dialect.TYPE_TO_EXPRESSIONS, 406 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 407 } 408 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 409 410 ANNOTATORS = { 411 **Dialect.ANNOTATORS, 412 **{ 413 expr_type: annotate_with_type_lambda(data_type) 414 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 415 for expr_type in expressions 416 }, 417 **{ 418 expr_type: lambda self, e: _annotate_math_functions(self, e) 419 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 420 }, 421 **{ 422 expr_type: lambda self, e: self._annotate_by_args(e, "this") 423 for expr_type in ( 424 exp.Left, 425 exp.Right, 426 exp.Lower, 427 exp.Upper, 428 exp.Pad, 429 exp.Trim, 430 exp.RegexpExtract, 431 exp.RegexpReplace, 432 exp.Repeat, 433 exp.Substring, 434 ) 435 }, 436 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 437 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 438 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 439 } 440 441 def normalize_identifier(self, expression: E) -> E: 442 if ( 443 isinstance(expression, exp.Identifier) 444 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 445 ): 446 parent = expression.parent 447 while isinstance(parent, exp.Dot): 448 parent = parent.parent 449 450 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 451 # by default. The following check uses a heuristic to detect tables based on whether 452 # they are qualified. This should generally be correct, because tables in BigQuery 453 # must be qualified with at least a dataset, unless @@dataset_id is set. 454 case_sensitive = ( 455 isinstance(parent, exp.UserDefinedFunction) 456 or ( 457 isinstance(parent, exp.Table) 458 and parent.db 459 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 460 ) 461 or expression.meta.get("is_table") 462 ) 463 if not case_sensitive: 464 expression.set("this", expression.this.lower()) 465 466 return expression 467 468 class Tokenizer(tokens.Tokenizer): 469 QUOTES = ["'", '"', '"""', "'''"] 470 COMMENTS = ["--", "#", ("/*", "*/")] 471 IDENTIFIERS = ["`"] 472 STRING_ESCAPES = ["\\"] 473 474 HEX_STRINGS = [("0x", ""), ("0X", "")] 475 476 BYTE_STRINGS = [ 477 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 478 ] 479 480 RAW_STRINGS = [ 481 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 482 ] 483 484 KEYWORDS = { 485 **tokens.Tokenizer.KEYWORDS, 486 "ANY TYPE": TokenType.VARIANT, 487 "BEGIN": TokenType.COMMAND, 488 "BEGIN TRANSACTION": TokenType.BEGIN, 489 "BYTEINT": TokenType.INT, 490 "BYTES": TokenType.BINARY, 491 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 492 "DATETIME": TokenType.TIMESTAMP, 493 "DECLARE": TokenType.COMMAND, 494 "ELSEIF": TokenType.COMMAND, 495 "EXCEPTION": TokenType.COMMAND, 496 "EXPORT": TokenType.EXPORT, 497 "FLOAT64": TokenType.DOUBLE, 498 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 499 "MODEL": TokenType.MODEL, 500 "NOT DETERMINISTIC": TokenType.VOLATILE, 501 "RECORD": TokenType.STRUCT, 502 "TIMESTAMP": TokenType.TIMESTAMPTZ, 503 } 504 KEYWORDS.pop("DIV") 505 KEYWORDS.pop("VALUES") 506 KEYWORDS.pop("/*+") 507 508 class Parser(parser.Parser): 509 PREFIXED_PIVOT_COLUMNS = True 510 LOG_DEFAULTS_TO_LN = True 511 SUPPORTS_IMPLICIT_UNNEST = True 512 513 FUNCTIONS = { 514 **parser.Parser.FUNCTIONS, 515 "CONTAINS_SUBSTR": _build_contains_substring, 516 "DATE": _build_date, 517 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 518 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 519 "DATE_TRUNC": lambda args: exp.DateTrunc( 520 unit=exp.Literal.string(str(seq_get(args, 1))), 521 this=seq_get(args, 0), 522 zone=seq_get(args, 2), 523 ), 524 "DATETIME": _build_datetime, 525 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 526 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 527 "DIV": binary_from_function(exp.IntDiv), 528 "EDIT_DISTANCE": _build_levenshtein, 529 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 530 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 531 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 532 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 533 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 534 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 535 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 536 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 537 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 538 "MD5": exp.MD5Digest.from_arg_list, 539 "TO_HEX": _build_to_hex, 540 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 541 [seq_get(args, 1), seq_get(args, 0)] 542 ), 543 "PARSE_TIMESTAMP": _build_parse_timestamp, 544 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 545 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 546 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 547 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 548 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 549 ), 550 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 551 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 552 "SPLIT": lambda args: exp.Split( 553 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 554 this=seq_get(args, 0), 555 expression=seq_get(args, 1) or exp.Literal.string(","), 556 ), 557 "STRPOS": exp.StrPosition.from_arg_list, 558 "TIME": _build_time, 559 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 560 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 561 "TIMESTAMP": _build_timestamp, 562 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 563 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 564 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 565 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 566 ), 567 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 568 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 569 ), 570 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 571 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 572 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 573 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 574 } 575 576 FUNCTION_PARSERS = { 577 **parser.Parser.FUNCTION_PARSERS, 578 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 579 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 580 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 581 } 582 FUNCTION_PARSERS.pop("TRIM") 583 584 NO_PAREN_FUNCTIONS = { 585 **parser.Parser.NO_PAREN_FUNCTIONS, 586 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 587 } 588 589 NESTED_TYPE_TOKENS = { 590 *parser.Parser.NESTED_TYPE_TOKENS, 591 TokenType.TABLE, 592 } 593 594 PROPERTY_PARSERS = { 595 **parser.Parser.PROPERTY_PARSERS, 596 "NOT DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 598 ), 599 "OPTIONS": lambda self: self._parse_with_property(), 600 } 601 602 CONSTRAINT_PARSERS = { 603 **parser.Parser.CONSTRAINT_PARSERS, 604 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 605 } 606 607 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 608 RANGE_PARSERS.pop(TokenType.OVERLAPS) 609 610 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 611 612 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 613 614 STATEMENT_PARSERS = { 615 **parser.Parser.STATEMENT_PARSERS, 616 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 617 TokenType.END: lambda self: self._parse_as_command(self._prev), 618 TokenType.FOR: lambda self: self._parse_for_in(), 619 TokenType.EXPORT: lambda self: self._parse_export_data(), 620 } 621 622 BRACKET_OFFSETS = { 623 "OFFSET": (0, False), 624 "ORDINAL": (1, False), 625 "SAFE_OFFSET": (0, True), 626 "SAFE_ORDINAL": (1, True), 627 } 628 629 def _parse_for_in(self) -> exp.ForIn: 630 this = self._parse_range() 631 self._match_text_seq("DO") 632 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 633 634 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 635 this = super()._parse_table_part(schema=schema) or self._parse_number() 636 637 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 638 if isinstance(this, exp.Identifier): 639 table_name = this.name 640 while self._match(TokenType.DASH, advance=False) and self._next: 641 start = self._curr 642 while self._is_connected() and not self._match_set( 643 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 644 ): 645 self._advance() 646 647 table_name += self._find_sql(start, self._prev) 648 649 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 650 elif isinstance(this, exp.Literal): 651 table_name = this.name 652 653 if self._is_connected() and self._parse_var(any_token=True): 654 table_name += self._prev.text 655 656 this = exp.Identifier(this=table_name, quoted=True) 657 658 return this 659 660 def _parse_table_parts( 661 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 662 ) -> exp.Table: 663 table = super()._parse_table_parts( 664 schema=schema, is_db_reference=is_db_reference, wildcard=True 665 ) 666 667 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 668 if not table.catalog: 669 if table.db: 670 parts = table.db.split(".") 671 if len(parts) == 2 and not table.args["db"].quoted: 672 table.set("catalog", exp.Identifier(this=parts[0])) 673 table.set("db", exp.Identifier(this=parts[1])) 674 else: 675 parts = table.name.split(".") 676 if len(parts) == 2 and not table.this.quoted: 677 table.set("db", exp.Identifier(this=parts[0])) 678 table.set("this", exp.Identifier(this=parts[1])) 679 680 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 681 alias = table.this 682 catalog, db, this, *rest = ( 683 exp.to_identifier(p, quoted=True) 684 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 685 ) 686 687 if rest and this: 688 this = exp.Dot.build([this, *rest]) # type: ignore 689 690 table = exp.Table( 691 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 692 ) 693 table.meta["quoted_table"] = True 694 else: 695 alias = None 696 697 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 698 # dataset, so if the project identifier is omitted we need to fix the ast so that 699 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 700 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 701 # views, because it would seem like the "catalog" part is set, when it'd actually 702 # be the region/dataset. Merging the two identifiers into a single one is done to 703 # avoid producing a 4-part Table reference, which would cause issues in the schema 704 # module, when there are 3-part table names mixed with information schema views. 705 # 706 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 707 table_parts = table.parts 708 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 709 # We need to alias the table here to avoid breaking existing qualified columns. 710 # This is expected to be safe, because if there's an actual alias coming up in 711 # the token stream, it will overwrite this one. If there isn't one, we are only 712 # exposing the name that can be used to reference the view explicitly (a no-op). 713 exp.alias_( 714 table, 715 t.cast(exp.Identifier, alias or table_parts[-1]), 716 table=True, 717 copy=False, 718 ) 719 720 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 721 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 722 table.set("db", seq_get(table_parts, -3)) 723 table.set("catalog", seq_get(table_parts, -4)) 724 725 return table 726 727 def _parse_column(self) -> t.Optional[exp.Expression]: 728 column = super()._parse_column() 729 if isinstance(column, exp.Column): 730 parts = column.parts 731 if any("." in p.name for p in parts): 732 catalog, db, table, this, *rest = ( 733 exp.to_identifier(p, quoted=True) 734 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 735 ) 736 737 if rest and this: 738 this = exp.Dot.build([this, *rest]) # type: ignore 739 740 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 741 column.meta["quoted_column"] = True 742 743 return column 744 745 @t.overload 746 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 747 748 @t.overload 749 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 750 751 def _parse_json_object(self, agg=False): 752 json_object = super()._parse_json_object() 753 array_kv_pair = seq_get(json_object.expressions, 0) 754 755 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 756 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 757 if ( 758 array_kv_pair 759 and isinstance(array_kv_pair.this, exp.Array) 760 and isinstance(array_kv_pair.expression, exp.Array) 761 ): 762 keys = array_kv_pair.this.expressions 763 values = array_kv_pair.expression.expressions 764 765 json_object.set( 766 "expressions", 767 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 768 ) 769 770 return json_object 771 772 def _parse_bracket( 773 self, this: t.Optional[exp.Expression] = None 774 ) -> t.Optional[exp.Expression]: 775 bracket = super()._parse_bracket(this) 776 777 if this is bracket: 778 return bracket 779 780 if isinstance(bracket, exp.Bracket): 781 for expression in bracket.expressions: 782 name = expression.name.upper() 783 784 if name not in self.BRACKET_OFFSETS: 785 break 786 787 offset, safe = self.BRACKET_OFFSETS[name] 788 bracket.set("offset", offset) 789 bracket.set("safe", safe) 790 expression.replace(expression.expressions[0]) 791 792 return bracket 793 794 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 795 unnest = super()._parse_unnest(with_alias=with_alias) 796 797 if not unnest: 798 return None 799 800 unnest_expr = seq_get(unnest.expressions, 0) 801 if unnest_expr: 802 from sqlglot.optimizer.annotate_types import annotate_types 803 804 unnest_expr = annotate_types(unnest_expr) 805 806 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 807 # in contrast to other dialects such as DuckDB which flattens only the array by default 808 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 809 array_elem.is_type(exp.DataType.Type.STRUCT) 810 for array_elem in unnest_expr._type.expressions 811 ): 812 unnest.set("explode_array", True) 813 814 return unnest 815 816 def _parse_make_interval(self) -> exp.MakeInterval: 817 expr = exp.MakeInterval() 818 819 for arg_key in expr.arg_types: 820 value = self._parse_lambda() 821 822 if not value: 823 break 824 825 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 826 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 827 if isinstance(value, exp.Kwarg): 828 arg_key = value.this.name 829 830 expr.set(arg_key, value) 831 832 self._match(TokenType.COMMA) 833 834 return expr 835 836 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 837 expr = self.expression( 838 exp.FeaturesAtTime, 839 this=(self._match(TokenType.TABLE) and self._parse_table()) 840 or self._parse_select(nested=True), 841 ) 842 843 while self._match(TokenType.COMMA): 844 arg = self._parse_lambda() 845 846 # Get the LHS of the Kwarg and set the arg to that value, e.g 847 # "num_rows => 1" sets the expr's `num_rows` arg 848 if arg: 849 expr.set(arg.this.name, arg) 850 851 return expr 852 853 def _parse_export_data(self) -> exp.Export: 854 self._match_text_seq("DATA") 855 856 return self.expression( 857 exp.Export, 858 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 859 options=self._parse_properties(), 860 this=self._match_text_seq("AS") and self._parse_select(), 861 ) 862 863 class Generator(generator.Generator): 864 INTERVAL_ALLOWS_PLURAL_FORM = False 865 JOIN_HINTS = False 866 QUERY_HINTS = False 867 TABLE_HINTS = False 868 LIMIT_FETCH = "LIMIT" 869 RENAME_TABLE_WITH_DB = False 870 NVL2_SUPPORTED = False 871 UNNEST_WITH_ORDINALITY = False 872 COLLATE_IS_FUNC = True 873 LIMIT_ONLY_LITERALS = True 874 SUPPORTS_TABLE_ALIAS_COLUMNS = False 875 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 876 JSON_KEY_VALUE_PAIR_SEP = "," 877 NULL_ORDERING_SUPPORTED = False 878 IGNORE_NULLS_IN_FUNC = True 879 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 880 CAN_IMPLEMENT_ARRAY_ANY = True 881 SUPPORTS_TO_NUMBER = False 882 NAMED_PLACEHOLDER_TOKEN = "@" 883 HEX_FUNC = "TO_HEX" 884 WITH_PROPERTIES_PREFIX = "OPTIONS" 885 SUPPORTS_EXPLODING_PROJECTIONS = False 886 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 887 SUPPORTS_UNIX_SECONDS = True 888 889 TRANSFORMS = { 890 **generator.Generator.TRANSFORMS, 891 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 892 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 893 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 894 exp.Array: inline_array_unless_query, 895 exp.ArrayContains: _array_contains_sql, 896 exp.ArrayFilter: filter_array_using_unnest, 897 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 898 exp.CollateProperty: lambda self, e: ( 899 f"DEFAULT COLLATE {self.sql(e, 'this')}" 900 if e.args.get("default") 901 else f"COLLATE {self.sql(e, 'this')}" 902 ), 903 exp.Commit: lambda *_: "COMMIT TRANSACTION", 904 exp.CountIf: rename_func("COUNTIF"), 905 exp.Create: _create_sql, 906 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 907 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 908 exp.DateDiff: lambda self, e: self.func( 909 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 910 ), 911 exp.DateFromParts: rename_func("DATE"), 912 exp.DateStrToDate: datestrtodate_sql, 913 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 914 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 915 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 916 exp.DateTrunc: lambda self, e: self.func( 917 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 918 ), 919 exp.FromTimeZone: lambda self, e: self.func( 920 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 921 ), 922 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 923 exp.GroupConcat: lambda self, e: groupconcat_sql( 924 self, e, func_name="STRING_AGG", within_group=False 925 ), 926 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 927 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 928 exp.If: if_sql(false_value="NULL"), 929 exp.ILike: no_ilike_sql, 930 exp.IntDiv: rename_func("DIV"), 931 exp.Int64: rename_func("INT64"), 932 exp.JSONExtract: _json_extract_sql, 933 exp.JSONExtractArray: _json_extract_sql, 934 exp.JSONExtractScalar: _json_extract_sql, 935 exp.JSONFormat: rename_func("TO_JSON_STRING"), 936 exp.Levenshtein: _levenshtein_sql, 937 exp.Max: max_or_greatest, 938 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 939 exp.MD5Digest: rename_func("MD5"), 940 exp.Min: min_or_least, 941 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 942 exp.RegexpExtract: lambda self, e: self.func( 943 "REGEXP_EXTRACT", 944 e.this, 945 e.expression, 946 e.args.get("position"), 947 e.args.get("occurrence"), 948 ), 949 exp.RegexpExtractAll: lambda self, e: self.func( 950 "REGEXP_EXTRACT_ALL", e.this, e.expression 951 ), 952 exp.RegexpReplace: regexp_replace_sql, 953 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 954 exp.ReturnsProperty: _returnsproperty_sql, 955 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 956 exp.Select: transforms.preprocess( 957 [ 958 transforms.explode_projection_to_unnest(), 959 transforms.unqualify_unnest, 960 transforms.eliminate_distinct_on, 961 _alias_ordered_group, 962 transforms.eliminate_semi_and_anti_joins, 963 ] 964 ), 965 exp.SHA: rename_func("SHA1"), 966 exp.SHA2: sha256_sql, 967 exp.StabilityProperty: lambda self, e: ( 968 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 969 ), 970 exp.String: rename_func("STRING"), 971 exp.StrPosition: lambda self, e: ( 972 strposition_sql( 973 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 974 ) 975 ), 976 exp.StrToDate: _str_to_datetime_sql, 977 exp.StrToTime: _str_to_datetime_sql, 978 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 979 exp.TimeFromParts: rename_func("TIME"), 980 exp.TimestampFromParts: rename_func("DATETIME"), 981 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 982 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 983 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 984 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 985 exp.TimeStrToTime: timestrtotime_sql, 986 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 987 exp.TsOrDsAdd: _ts_or_ds_add_sql, 988 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 989 exp.TsOrDsToTime: rename_func("TIME"), 990 exp.TsOrDsToDatetime: rename_func("DATETIME"), 991 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 992 exp.Unhex: rename_func("FROM_HEX"), 993 exp.UnixDate: rename_func("UNIX_DATE"), 994 exp.UnixToTime: _unix_to_time_sql, 995 exp.Uuid: lambda *_: "GENERATE_UUID()", 996 exp.Values: _derived_table_values_to_unnest, 997 exp.VariancePop: rename_func("VAR_POP"), 998 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 999 } 1000 1001 SUPPORTED_JSON_PATH_PARTS = { 1002 exp.JSONPathKey, 1003 exp.JSONPathRoot, 1004 exp.JSONPathSubscript, 1005 } 1006 1007 TYPE_MAPPING = { 1008 **generator.Generator.TYPE_MAPPING, 1009 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1010 exp.DataType.Type.BIGINT: "INT64", 1011 exp.DataType.Type.BINARY: "BYTES", 1012 exp.DataType.Type.BLOB: "BYTES", 1013 exp.DataType.Type.BOOLEAN: "BOOL", 1014 exp.DataType.Type.CHAR: "STRING", 1015 exp.DataType.Type.DECIMAL: "NUMERIC", 1016 exp.DataType.Type.DOUBLE: "FLOAT64", 1017 exp.DataType.Type.FLOAT: "FLOAT64", 1018 exp.DataType.Type.INT: "INT64", 1019 exp.DataType.Type.NCHAR: "STRING", 1020 exp.DataType.Type.NVARCHAR: "STRING", 1021 exp.DataType.Type.SMALLINT: "INT64", 1022 exp.DataType.Type.TEXT: "STRING", 1023 exp.DataType.Type.TIMESTAMP: "DATETIME", 1024 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1025 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1026 exp.DataType.Type.TINYINT: "INT64", 1027 exp.DataType.Type.ROWVERSION: "BYTES", 1028 exp.DataType.Type.UUID: "STRING", 1029 exp.DataType.Type.VARBINARY: "BYTES", 1030 exp.DataType.Type.VARCHAR: "STRING", 1031 exp.DataType.Type.VARIANT: "ANY TYPE", 1032 } 1033 1034 PROPERTIES_LOCATION = { 1035 **generator.Generator.PROPERTIES_LOCATION, 1036 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1037 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1038 } 1039 1040 # WINDOW comes after QUALIFY 1041 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1042 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1043 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1044 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1045 } 1046 1047 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1048 RESERVED_KEYWORDS = { 1049 "all", 1050 "and", 1051 "any", 1052 "array", 1053 "as", 1054 "asc", 1055 "assert_rows_modified", 1056 "at", 1057 "between", 1058 "by", 1059 "case", 1060 "cast", 1061 "collate", 1062 "contains", 1063 "create", 1064 "cross", 1065 "cube", 1066 "current", 1067 "default", 1068 "define", 1069 "desc", 1070 "distinct", 1071 "else", 1072 "end", 1073 "enum", 1074 "escape", 1075 "except", 1076 "exclude", 1077 "exists", 1078 "extract", 1079 "false", 1080 "fetch", 1081 "following", 1082 "for", 1083 "from", 1084 "full", 1085 "group", 1086 "grouping", 1087 "groups", 1088 "hash", 1089 "having", 1090 "if", 1091 "ignore", 1092 "in", 1093 "inner", 1094 "intersect", 1095 "interval", 1096 "into", 1097 "is", 1098 "join", 1099 "lateral", 1100 "left", 1101 "like", 1102 "limit", 1103 "lookup", 1104 "merge", 1105 "natural", 1106 "new", 1107 "no", 1108 "not", 1109 "null", 1110 "nulls", 1111 "of", 1112 "on", 1113 "or", 1114 "order", 1115 "outer", 1116 "over", 1117 "partition", 1118 "preceding", 1119 "proto", 1120 "qualify", 1121 "range", 1122 "recursive", 1123 "respect", 1124 "right", 1125 "rollup", 1126 "rows", 1127 "select", 1128 "set", 1129 "some", 1130 "struct", 1131 "tablesample", 1132 "then", 1133 "to", 1134 "treat", 1135 "true", 1136 "unbounded", 1137 "union", 1138 "unnest", 1139 "using", 1140 "when", 1141 "where", 1142 "window", 1143 "with", 1144 "within", 1145 } 1146 1147 def mod_sql(self, expression: exp.Mod) -> str: 1148 this = expression.this 1149 expr = expression.expression 1150 return self.func( 1151 "MOD", 1152 this.unnest() if isinstance(this, exp.Paren) else this, 1153 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1154 ) 1155 1156 def column_parts(self, expression: exp.Column) -> str: 1157 if expression.meta.get("quoted_column"): 1158 # If a column reference is of the form `dataset.table`.name, we need 1159 # to preserve the quoted table path, otherwise the reference breaks 1160 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1161 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1162 return f"{table_path}.{self.sql(expression, 'this')}" 1163 1164 return super().column_parts(expression) 1165 1166 def table_parts(self, expression: exp.Table) -> str: 1167 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1168 # we need to make sure the correct quoting is used in each case. 1169 # 1170 # For example, if there is a CTE x that clashes with a schema name, then the former will 1171 # return the table y in that schema, whereas the latter will return the CTE's y column: 1172 # 1173 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1174 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1175 if expression.meta.get("quoted_table"): 1176 table_parts = ".".join(p.name for p in expression.parts) 1177 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1178 1179 return super().table_parts(expression) 1180 1181 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1182 this = expression.this 1183 if isinstance(this, exp.TsOrDsToDatetime): 1184 func_name = "FORMAT_DATETIME" 1185 elif isinstance(this, exp.TsOrDsToTimestamp): 1186 func_name = "FORMAT_TIMESTAMP" 1187 else: 1188 func_name = "FORMAT_DATE" 1189 1190 time_expr = ( 1191 this 1192 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1193 else expression 1194 ) 1195 return self.func( 1196 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1197 ) 1198 1199 def eq_sql(self, expression: exp.EQ) -> str: 1200 # Operands of = cannot be NULL in BigQuery 1201 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1202 if not isinstance(expression.parent, exp.Update): 1203 return "NULL" 1204 1205 return self.binary(expression, "=") 1206 1207 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1208 parent = expression.parent 1209 1210 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1211 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1212 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1213 return self.func( 1214 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1215 ) 1216 1217 return super().attimezone_sql(expression) 1218 1219 def trycast_sql(self, expression: exp.TryCast) -> str: 1220 return self.cast_sql(expression, safe_prefix="SAFE_") 1221 1222 def bracket_sql(self, expression: exp.Bracket) -> str: 1223 this = expression.this 1224 expressions = expression.expressions 1225 1226 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1227 arg = expressions[0] 1228 if arg.type is None: 1229 from sqlglot.optimizer.annotate_types import annotate_types 1230 1231 arg = annotate_types(arg) 1232 1233 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1234 # BQ doesn't support bracket syntax with string values for structs 1235 return f"{self.sql(this)}.{arg.name}" 1236 1237 expressions_sql = self.expressions(expression, flat=True) 1238 offset = expression.args.get("offset") 1239 1240 if offset == 0: 1241 expressions_sql = f"OFFSET({expressions_sql})" 1242 elif offset == 1: 1243 expressions_sql = f"ORDINAL({expressions_sql})" 1244 elif offset is not None: 1245 self.unsupported(f"Unsupported array offset: {offset}") 1246 1247 if expression.args.get("safe"): 1248 expressions_sql = f"SAFE_{expressions_sql}" 1249 1250 return f"{self.sql(this)}[{expressions_sql}]" 1251 1252 def in_unnest_op(self, expression: exp.Unnest) -> str: 1253 return self.sql(expression) 1254 1255 def version_sql(self, expression: exp.Version) -> str: 1256 if expression.name == "TIMESTAMP": 1257 expression.set("this", "SYSTEM_TIME") 1258 return super().version_sql(expression) 1259 1260 def contains_sql(self, expression: exp.Contains) -> str: 1261 this = expression.this 1262 expr = expression.expression 1263 1264 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1265 this = this.this 1266 expr = expr.this 1267 1268 return self.func("CONTAINS_SUBSTR", this, expr) 1269 1270 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1271 this = expression.this 1272 1273 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1274 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1275 # because they aren't literals and so the above syntax is invalid BigQuery. 1276 if isinstance(this, exp.Array): 1277 elem = seq_get(this.expressions, 0) 1278 if not (elem and elem.find(exp.Query)): 1279 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1280 1281 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
441 def normalize_identifier(self, expression: E) -> E: 442 if ( 443 isinstance(expression, exp.Identifier) 444 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 445 ): 446 parent = expression.parent 447 while isinstance(parent, exp.Dot): 448 parent = parent.parent 449 450 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 451 # by default. The following check uses a heuristic to detect tables based on whether 452 # they are qualified. This should generally be correct, because tables in BigQuery 453 # must be qualified with at least a dataset, unless @@dataset_id is set. 454 case_sensitive = ( 455 isinstance(parent, exp.UserDefinedFunction) 456 or ( 457 isinstance(parent, exp.Table) 458 and parent.db 459 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 460 ) 461 or expression.meta.get("is_table") 462 ) 463 if not case_sensitive: 464 expression.set("this", expression.this.lower()) 465 466 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
468 class Tokenizer(tokens.Tokenizer): 469 QUOTES = ["'", '"', '"""', "'''"] 470 COMMENTS = ["--", "#", ("/*", "*/")] 471 IDENTIFIERS = ["`"] 472 STRING_ESCAPES = ["\\"] 473 474 HEX_STRINGS = [("0x", ""), ("0X", "")] 475 476 BYTE_STRINGS = [ 477 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 478 ] 479 480 RAW_STRINGS = [ 481 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 482 ] 483 484 KEYWORDS = { 485 **tokens.Tokenizer.KEYWORDS, 486 "ANY TYPE": TokenType.VARIANT, 487 "BEGIN": TokenType.COMMAND, 488 "BEGIN TRANSACTION": TokenType.BEGIN, 489 "BYTEINT": TokenType.INT, 490 "BYTES": TokenType.BINARY, 491 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 492 "DATETIME": TokenType.TIMESTAMP, 493 "DECLARE": TokenType.COMMAND, 494 "ELSEIF": TokenType.COMMAND, 495 "EXCEPTION": TokenType.COMMAND, 496 "EXPORT": TokenType.EXPORT, 497 "FLOAT64": TokenType.DOUBLE, 498 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 499 "MODEL": TokenType.MODEL, 500 "NOT DETERMINISTIC": TokenType.VOLATILE, 501 "RECORD": TokenType.STRUCT, 502 "TIMESTAMP": TokenType.TIMESTAMPTZ, 503 } 504 KEYWORDS.pop("DIV") 505 KEYWORDS.pop("VALUES") 506 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
508 class Parser(parser.Parser): 509 PREFIXED_PIVOT_COLUMNS = True 510 LOG_DEFAULTS_TO_LN = True 511 SUPPORTS_IMPLICIT_UNNEST = True 512 513 FUNCTIONS = { 514 **parser.Parser.FUNCTIONS, 515 "CONTAINS_SUBSTR": _build_contains_substring, 516 "DATE": _build_date, 517 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 518 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 519 "DATE_TRUNC": lambda args: exp.DateTrunc( 520 unit=exp.Literal.string(str(seq_get(args, 1))), 521 this=seq_get(args, 0), 522 zone=seq_get(args, 2), 523 ), 524 "DATETIME": _build_datetime, 525 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 526 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 527 "DIV": binary_from_function(exp.IntDiv), 528 "EDIT_DISTANCE": _build_levenshtein, 529 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 530 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 531 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 532 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 533 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 534 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 535 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 536 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 537 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 538 "MD5": exp.MD5Digest.from_arg_list, 539 "TO_HEX": _build_to_hex, 540 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 541 [seq_get(args, 1), seq_get(args, 0)] 542 ), 543 "PARSE_TIMESTAMP": _build_parse_timestamp, 544 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 545 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 546 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 547 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 548 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 549 ), 550 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 551 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 552 "SPLIT": lambda args: exp.Split( 553 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 554 this=seq_get(args, 0), 555 expression=seq_get(args, 1) or exp.Literal.string(","), 556 ), 557 "STRPOS": exp.StrPosition.from_arg_list, 558 "TIME": _build_time, 559 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 560 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 561 "TIMESTAMP": _build_timestamp, 562 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 563 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 564 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 565 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 566 ), 567 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 568 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 569 ), 570 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 571 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 572 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 573 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 574 } 575 576 FUNCTION_PARSERS = { 577 **parser.Parser.FUNCTION_PARSERS, 578 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 579 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 580 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 581 } 582 FUNCTION_PARSERS.pop("TRIM") 583 584 NO_PAREN_FUNCTIONS = { 585 **parser.Parser.NO_PAREN_FUNCTIONS, 586 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 587 } 588 589 NESTED_TYPE_TOKENS = { 590 *parser.Parser.NESTED_TYPE_TOKENS, 591 TokenType.TABLE, 592 } 593 594 PROPERTY_PARSERS = { 595 **parser.Parser.PROPERTY_PARSERS, 596 "NOT DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 598 ), 599 "OPTIONS": lambda self: self._parse_with_property(), 600 } 601 602 CONSTRAINT_PARSERS = { 603 **parser.Parser.CONSTRAINT_PARSERS, 604 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 605 } 606 607 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 608 RANGE_PARSERS.pop(TokenType.OVERLAPS) 609 610 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 611 612 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 613 614 STATEMENT_PARSERS = { 615 **parser.Parser.STATEMENT_PARSERS, 616 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 617 TokenType.END: lambda self: self._parse_as_command(self._prev), 618 TokenType.FOR: lambda self: self._parse_for_in(), 619 TokenType.EXPORT: lambda self: self._parse_export_data(), 620 } 621 622 BRACKET_OFFSETS = { 623 "OFFSET": (0, False), 624 "ORDINAL": (1, False), 625 "SAFE_OFFSET": (0, True), 626 "SAFE_ORDINAL": (1, True), 627 } 628 629 def _parse_for_in(self) -> exp.ForIn: 630 this = self._parse_range() 631 self._match_text_seq("DO") 632 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 633 634 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 635 this = super()._parse_table_part(schema=schema) or self._parse_number() 636 637 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 638 if isinstance(this, exp.Identifier): 639 table_name = this.name 640 while self._match(TokenType.DASH, advance=False) and self._next: 641 start = self._curr 642 while self._is_connected() and not self._match_set( 643 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 644 ): 645 self._advance() 646 647 table_name += self._find_sql(start, self._prev) 648 649 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 650 elif isinstance(this, exp.Literal): 651 table_name = this.name 652 653 if self._is_connected() and self._parse_var(any_token=True): 654 table_name += self._prev.text 655 656 this = exp.Identifier(this=table_name, quoted=True) 657 658 return this 659 660 def _parse_table_parts( 661 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 662 ) -> exp.Table: 663 table = super()._parse_table_parts( 664 schema=schema, is_db_reference=is_db_reference, wildcard=True 665 ) 666 667 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 668 if not table.catalog: 669 if table.db: 670 parts = table.db.split(".") 671 if len(parts) == 2 and not table.args["db"].quoted: 672 table.set("catalog", exp.Identifier(this=parts[0])) 673 table.set("db", exp.Identifier(this=parts[1])) 674 else: 675 parts = table.name.split(".") 676 if len(parts) == 2 and not table.this.quoted: 677 table.set("db", exp.Identifier(this=parts[0])) 678 table.set("this", exp.Identifier(this=parts[1])) 679 680 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 681 alias = table.this 682 catalog, db, this, *rest = ( 683 exp.to_identifier(p, quoted=True) 684 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 685 ) 686 687 if rest and this: 688 this = exp.Dot.build([this, *rest]) # type: ignore 689 690 table = exp.Table( 691 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 692 ) 693 table.meta["quoted_table"] = True 694 else: 695 alias = None 696 697 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 698 # dataset, so if the project identifier is omitted we need to fix the ast so that 699 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 700 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 701 # views, because it would seem like the "catalog" part is set, when it'd actually 702 # be the region/dataset. Merging the two identifiers into a single one is done to 703 # avoid producing a 4-part Table reference, which would cause issues in the schema 704 # module, when there are 3-part table names mixed with information schema views. 705 # 706 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 707 table_parts = table.parts 708 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 709 # We need to alias the table here to avoid breaking existing qualified columns. 710 # This is expected to be safe, because if there's an actual alias coming up in 711 # the token stream, it will overwrite this one. If there isn't one, we are only 712 # exposing the name that can be used to reference the view explicitly (a no-op). 713 exp.alias_( 714 table, 715 t.cast(exp.Identifier, alias or table_parts[-1]), 716 table=True, 717 copy=False, 718 ) 719 720 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 721 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 722 table.set("db", seq_get(table_parts, -3)) 723 table.set("catalog", seq_get(table_parts, -4)) 724 725 return table 726 727 def _parse_column(self) -> t.Optional[exp.Expression]: 728 column = super()._parse_column() 729 if isinstance(column, exp.Column): 730 parts = column.parts 731 if any("." in p.name for p in parts): 732 catalog, db, table, this, *rest = ( 733 exp.to_identifier(p, quoted=True) 734 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 735 ) 736 737 if rest and this: 738 this = exp.Dot.build([this, *rest]) # type: ignore 739 740 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 741 column.meta["quoted_column"] = True 742 743 return column 744 745 @t.overload 746 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 747 748 @t.overload 749 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 750 751 def _parse_json_object(self, agg=False): 752 json_object = super()._parse_json_object() 753 array_kv_pair = seq_get(json_object.expressions, 0) 754 755 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 756 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 757 if ( 758 array_kv_pair 759 and isinstance(array_kv_pair.this, exp.Array) 760 and isinstance(array_kv_pair.expression, exp.Array) 761 ): 762 keys = array_kv_pair.this.expressions 763 values = array_kv_pair.expression.expressions 764 765 json_object.set( 766 "expressions", 767 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 768 ) 769 770 return json_object 771 772 def _parse_bracket( 773 self, this: t.Optional[exp.Expression] = None 774 ) -> t.Optional[exp.Expression]: 775 bracket = super()._parse_bracket(this) 776 777 if this is bracket: 778 return bracket 779 780 if isinstance(bracket, exp.Bracket): 781 for expression in bracket.expressions: 782 name = expression.name.upper() 783 784 if name not in self.BRACKET_OFFSETS: 785 break 786 787 offset, safe = self.BRACKET_OFFSETS[name] 788 bracket.set("offset", offset) 789 bracket.set("safe", safe) 790 expression.replace(expression.expressions[0]) 791 792 return bracket 793 794 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 795 unnest = super()._parse_unnest(with_alias=with_alias) 796 797 if not unnest: 798 return None 799 800 unnest_expr = seq_get(unnest.expressions, 0) 801 if unnest_expr: 802 from sqlglot.optimizer.annotate_types import annotate_types 803 804 unnest_expr = annotate_types(unnest_expr) 805 806 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 807 # in contrast to other dialects such as DuckDB which flattens only the array by default 808 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 809 array_elem.is_type(exp.DataType.Type.STRUCT) 810 for array_elem in unnest_expr._type.expressions 811 ): 812 unnest.set("explode_array", True) 813 814 return unnest 815 816 def _parse_make_interval(self) -> exp.MakeInterval: 817 expr = exp.MakeInterval() 818 819 for arg_key in expr.arg_types: 820 value = self._parse_lambda() 821 822 if not value: 823 break 824 825 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 826 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 827 if isinstance(value, exp.Kwarg): 828 arg_key = value.this.name 829 830 expr.set(arg_key, value) 831 832 self._match(TokenType.COMMA) 833 834 return expr 835 836 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 837 expr = self.expression( 838 exp.FeaturesAtTime, 839 this=(self._match(TokenType.TABLE) and self._parse_table()) 840 or self._parse_select(nested=True), 841 ) 842 843 while self._match(TokenType.COMMA): 844 arg = self._parse_lambda() 845 846 # Get the LHS of the Kwarg and set the arg to that value, e.g 847 # "num_rows => 1" sets the expr's `num_rows` arg 848 if arg: 849 expr.set(arg.this.name, arg) 850 851 return expr 852 853 def _parse_export_data(self) -> exp.Export: 854 self._match_text_seq("DATA") 855 856 return self.expression( 857 exp.Export, 858 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 859 options=self._parse_properties(), 860 this=self._match_text_seq("AS") and self._parse_select(), 861 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
863 class Generator(generator.Generator): 864 INTERVAL_ALLOWS_PLURAL_FORM = False 865 JOIN_HINTS = False 866 QUERY_HINTS = False 867 TABLE_HINTS = False 868 LIMIT_FETCH = "LIMIT" 869 RENAME_TABLE_WITH_DB = False 870 NVL2_SUPPORTED = False 871 UNNEST_WITH_ORDINALITY = False 872 COLLATE_IS_FUNC = True 873 LIMIT_ONLY_LITERALS = True 874 SUPPORTS_TABLE_ALIAS_COLUMNS = False 875 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 876 JSON_KEY_VALUE_PAIR_SEP = "," 877 NULL_ORDERING_SUPPORTED = False 878 IGNORE_NULLS_IN_FUNC = True 879 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 880 CAN_IMPLEMENT_ARRAY_ANY = True 881 SUPPORTS_TO_NUMBER = False 882 NAMED_PLACEHOLDER_TOKEN = "@" 883 HEX_FUNC = "TO_HEX" 884 WITH_PROPERTIES_PREFIX = "OPTIONS" 885 SUPPORTS_EXPLODING_PROJECTIONS = False 886 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 887 SUPPORTS_UNIX_SECONDS = True 888 889 TRANSFORMS = { 890 **generator.Generator.TRANSFORMS, 891 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 892 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 893 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 894 exp.Array: inline_array_unless_query, 895 exp.ArrayContains: _array_contains_sql, 896 exp.ArrayFilter: filter_array_using_unnest, 897 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 898 exp.CollateProperty: lambda self, e: ( 899 f"DEFAULT COLLATE {self.sql(e, 'this')}" 900 if e.args.get("default") 901 else f"COLLATE {self.sql(e, 'this')}" 902 ), 903 exp.Commit: lambda *_: "COMMIT TRANSACTION", 904 exp.CountIf: rename_func("COUNTIF"), 905 exp.Create: _create_sql, 906 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 907 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 908 exp.DateDiff: lambda self, e: self.func( 909 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 910 ), 911 exp.DateFromParts: rename_func("DATE"), 912 exp.DateStrToDate: datestrtodate_sql, 913 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 914 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 915 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 916 exp.DateTrunc: lambda self, e: self.func( 917 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 918 ), 919 exp.FromTimeZone: lambda self, e: self.func( 920 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 921 ), 922 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 923 exp.GroupConcat: lambda self, e: groupconcat_sql( 924 self, e, func_name="STRING_AGG", within_group=False 925 ), 926 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 927 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 928 exp.If: if_sql(false_value="NULL"), 929 exp.ILike: no_ilike_sql, 930 exp.IntDiv: rename_func("DIV"), 931 exp.Int64: rename_func("INT64"), 932 exp.JSONExtract: _json_extract_sql, 933 exp.JSONExtractArray: _json_extract_sql, 934 exp.JSONExtractScalar: _json_extract_sql, 935 exp.JSONFormat: rename_func("TO_JSON_STRING"), 936 exp.Levenshtein: _levenshtein_sql, 937 exp.Max: max_or_greatest, 938 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 939 exp.MD5Digest: rename_func("MD5"), 940 exp.Min: min_or_least, 941 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 942 exp.RegexpExtract: lambda self, e: self.func( 943 "REGEXP_EXTRACT", 944 e.this, 945 e.expression, 946 e.args.get("position"), 947 e.args.get("occurrence"), 948 ), 949 exp.RegexpExtractAll: lambda self, e: self.func( 950 "REGEXP_EXTRACT_ALL", e.this, e.expression 951 ), 952 exp.RegexpReplace: regexp_replace_sql, 953 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 954 exp.ReturnsProperty: _returnsproperty_sql, 955 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 956 exp.Select: transforms.preprocess( 957 [ 958 transforms.explode_projection_to_unnest(), 959 transforms.unqualify_unnest, 960 transforms.eliminate_distinct_on, 961 _alias_ordered_group, 962 transforms.eliminate_semi_and_anti_joins, 963 ] 964 ), 965 exp.SHA: rename_func("SHA1"), 966 exp.SHA2: sha256_sql, 967 exp.StabilityProperty: lambda self, e: ( 968 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 969 ), 970 exp.String: rename_func("STRING"), 971 exp.StrPosition: lambda self, e: ( 972 strposition_sql( 973 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 974 ) 975 ), 976 exp.StrToDate: _str_to_datetime_sql, 977 exp.StrToTime: _str_to_datetime_sql, 978 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 979 exp.TimeFromParts: rename_func("TIME"), 980 exp.TimestampFromParts: rename_func("DATETIME"), 981 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 982 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 983 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 984 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 985 exp.TimeStrToTime: timestrtotime_sql, 986 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 987 exp.TsOrDsAdd: _ts_or_ds_add_sql, 988 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 989 exp.TsOrDsToTime: rename_func("TIME"), 990 exp.TsOrDsToDatetime: rename_func("DATETIME"), 991 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 992 exp.Unhex: rename_func("FROM_HEX"), 993 exp.UnixDate: rename_func("UNIX_DATE"), 994 exp.UnixToTime: _unix_to_time_sql, 995 exp.Uuid: lambda *_: "GENERATE_UUID()", 996 exp.Values: _derived_table_values_to_unnest, 997 exp.VariancePop: rename_func("VAR_POP"), 998 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 999 } 1000 1001 SUPPORTED_JSON_PATH_PARTS = { 1002 exp.JSONPathKey, 1003 exp.JSONPathRoot, 1004 exp.JSONPathSubscript, 1005 } 1006 1007 TYPE_MAPPING = { 1008 **generator.Generator.TYPE_MAPPING, 1009 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1010 exp.DataType.Type.BIGINT: "INT64", 1011 exp.DataType.Type.BINARY: "BYTES", 1012 exp.DataType.Type.BLOB: "BYTES", 1013 exp.DataType.Type.BOOLEAN: "BOOL", 1014 exp.DataType.Type.CHAR: "STRING", 1015 exp.DataType.Type.DECIMAL: "NUMERIC", 1016 exp.DataType.Type.DOUBLE: "FLOAT64", 1017 exp.DataType.Type.FLOAT: "FLOAT64", 1018 exp.DataType.Type.INT: "INT64", 1019 exp.DataType.Type.NCHAR: "STRING", 1020 exp.DataType.Type.NVARCHAR: "STRING", 1021 exp.DataType.Type.SMALLINT: "INT64", 1022 exp.DataType.Type.TEXT: "STRING", 1023 exp.DataType.Type.TIMESTAMP: "DATETIME", 1024 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1025 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1026 exp.DataType.Type.TINYINT: "INT64", 1027 exp.DataType.Type.ROWVERSION: "BYTES", 1028 exp.DataType.Type.UUID: "STRING", 1029 exp.DataType.Type.VARBINARY: "BYTES", 1030 exp.DataType.Type.VARCHAR: "STRING", 1031 exp.DataType.Type.VARIANT: "ANY TYPE", 1032 } 1033 1034 PROPERTIES_LOCATION = { 1035 **generator.Generator.PROPERTIES_LOCATION, 1036 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1037 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1038 } 1039 1040 # WINDOW comes after QUALIFY 1041 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1042 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1043 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1044 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1045 } 1046 1047 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1048 RESERVED_KEYWORDS = { 1049 "all", 1050 "and", 1051 "any", 1052 "array", 1053 "as", 1054 "asc", 1055 "assert_rows_modified", 1056 "at", 1057 "between", 1058 "by", 1059 "case", 1060 "cast", 1061 "collate", 1062 "contains", 1063 "create", 1064 "cross", 1065 "cube", 1066 "current", 1067 "default", 1068 "define", 1069 "desc", 1070 "distinct", 1071 "else", 1072 "end", 1073 "enum", 1074 "escape", 1075 "except", 1076 "exclude", 1077 "exists", 1078 "extract", 1079 "false", 1080 "fetch", 1081 "following", 1082 "for", 1083 "from", 1084 "full", 1085 "group", 1086 "grouping", 1087 "groups", 1088 "hash", 1089 "having", 1090 "if", 1091 "ignore", 1092 "in", 1093 "inner", 1094 "intersect", 1095 "interval", 1096 "into", 1097 "is", 1098 "join", 1099 "lateral", 1100 "left", 1101 "like", 1102 "limit", 1103 "lookup", 1104 "merge", 1105 "natural", 1106 "new", 1107 "no", 1108 "not", 1109 "null", 1110 "nulls", 1111 "of", 1112 "on", 1113 "or", 1114 "order", 1115 "outer", 1116 "over", 1117 "partition", 1118 "preceding", 1119 "proto", 1120 "qualify", 1121 "range", 1122 "recursive", 1123 "respect", 1124 "right", 1125 "rollup", 1126 "rows", 1127 "select", 1128 "set", 1129 "some", 1130 "struct", 1131 "tablesample", 1132 "then", 1133 "to", 1134 "treat", 1135 "true", 1136 "unbounded", 1137 "union", 1138 "unnest", 1139 "using", 1140 "when", 1141 "where", 1142 "window", 1143 "with", 1144 "within", 1145 } 1146 1147 def mod_sql(self, expression: exp.Mod) -> str: 1148 this = expression.this 1149 expr = expression.expression 1150 return self.func( 1151 "MOD", 1152 this.unnest() if isinstance(this, exp.Paren) else this, 1153 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1154 ) 1155 1156 def column_parts(self, expression: exp.Column) -> str: 1157 if expression.meta.get("quoted_column"): 1158 # If a column reference is of the form `dataset.table`.name, we need 1159 # to preserve the quoted table path, otherwise the reference breaks 1160 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1161 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1162 return f"{table_path}.{self.sql(expression, 'this')}" 1163 1164 return super().column_parts(expression) 1165 1166 def table_parts(self, expression: exp.Table) -> str: 1167 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1168 # we need to make sure the correct quoting is used in each case. 1169 # 1170 # For example, if there is a CTE x that clashes with a schema name, then the former will 1171 # return the table y in that schema, whereas the latter will return the CTE's y column: 1172 # 1173 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1174 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1175 if expression.meta.get("quoted_table"): 1176 table_parts = ".".join(p.name for p in expression.parts) 1177 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1178 1179 return super().table_parts(expression) 1180 1181 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1182 this = expression.this 1183 if isinstance(this, exp.TsOrDsToDatetime): 1184 func_name = "FORMAT_DATETIME" 1185 elif isinstance(this, exp.TsOrDsToTimestamp): 1186 func_name = "FORMAT_TIMESTAMP" 1187 else: 1188 func_name = "FORMAT_DATE" 1189 1190 time_expr = ( 1191 this 1192 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1193 else expression 1194 ) 1195 return self.func( 1196 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1197 ) 1198 1199 def eq_sql(self, expression: exp.EQ) -> str: 1200 # Operands of = cannot be NULL in BigQuery 1201 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1202 if not isinstance(expression.parent, exp.Update): 1203 return "NULL" 1204 1205 return self.binary(expression, "=") 1206 1207 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1208 parent = expression.parent 1209 1210 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1211 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1212 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1213 return self.func( 1214 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1215 ) 1216 1217 return super().attimezone_sql(expression) 1218 1219 def trycast_sql(self, expression: exp.TryCast) -> str: 1220 return self.cast_sql(expression, safe_prefix="SAFE_") 1221 1222 def bracket_sql(self, expression: exp.Bracket) -> str: 1223 this = expression.this 1224 expressions = expression.expressions 1225 1226 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1227 arg = expressions[0] 1228 if arg.type is None: 1229 from sqlglot.optimizer.annotate_types import annotate_types 1230 1231 arg = annotate_types(arg) 1232 1233 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1234 # BQ doesn't support bracket syntax with string values for structs 1235 return f"{self.sql(this)}.{arg.name}" 1236 1237 expressions_sql = self.expressions(expression, flat=True) 1238 offset = expression.args.get("offset") 1239 1240 if offset == 0: 1241 expressions_sql = f"OFFSET({expressions_sql})" 1242 elif offset == 1: 1243 expressions_sql = f"ORDINAL({expressions_sql})" 1244 elif offset is not None: 1245 self.unsupported(f"Unsupported array offset: {offset}") 1246 1247 if expression.args.get("safe"): 1248 expressions_sql = f"SAFE_{expressions_sql}" 1249 1250 return f"{self.sql(this)}[{expressions_sql}]" 1251 1252 def in_unnest_op(self, expression: exp.Unnest) -> str: 1253 return self.sql(expression) 1254 1255 def version_sql(self, expression: exp.Version) -> str: 1256 if expression.name == "TIMESTAMP": 1257 expression.set("this", "SYSTEM_TIME") 1258 return super().version_sql(expression) 1259 1260 def contains_sql(self, expression: exp.Contains) -> str: 1261 this = expression.this 1262 expr = expression.expression 1263 1264 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1265 this = this.this 1266 expr = expr.this 1267 1268 return self.func("CONTAINS_SUBSTR", this, expr) 1269 1270 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1271 this = expression.this 1272 1273 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1274 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1275 # because they aren't literals and so the above syntax is invalid BigQuery. 1276 if isinstance(this, exp.Array): 1277 elem = seq_get(this.expressions, 0) 1278 if not (elem and elem.find(exp.Query)): 1279 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1280 1281 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1156 def column_parts(self, expression: exp.Column) -> str: 1157 if expression.meta.get("quoted_column"): 1158 # If a column reference is of the form `dataset.table`.name, we need 1159 # to preserve the quoted table path, otherwise the reference breaks 1160 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1161 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1162 return f"{table_path}.{self.sql(expression, 'this')}" 1163 1164 return super().column_parts(expression)
1166 def table_parts(self, expression: exp.Table) -> str: 1167 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1168 # we need to make sure the correct quoting is used in each case. 1169 # 1170 # For example, if there is a CTE x that clashes with a schema name, then the former will 1171 # return the table y in that schema, whereas the latter will return the CTE's y column: 1172 # 1173 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1174 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1175 if expression.meta.get("quoted_table"): 1176 table_parts = ".".join(p.name for p in expression.parts) 1177 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1178 1179 return super().table_parts(expression)
1181 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1182 this = expression.this 1183 if isinstance(this, exp.TsOrDsToDatetime): 1184 func_name = "FORMAT_DATETIME" 1185 elif isinstance(this, exp.TsOrDsToTimestamp): 1186 func_name = "FORMAT_TIMESTAMP" 1187 else: 1188 func_name = "FORMAT_DATE" 1189 1190 time_expr = ( 1191 this 1192 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1193 else expression 1194 ) 1195 return self.func( 1196 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1197 )
1199 def eq_sql(self, expression: exp.EQ) -> str: 1200 # Operands of = cannot be NULL in BigQuery 1201 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1202 if not isinstance(expression.parent, exp.Update): 1203 return "NULL" 1204 1205 return self.binary(expression, "=")
1207 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1208 parent = expression.parent 1209 1210 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1211 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1212 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1213 return self.func( 1214 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1215 ) 1216 1217 return super().attimezone_sql(expression)
1222 def bracket_sql(self, expression: exp.Bracket) -> str: 1223 this = expression.this 1224 expressions = expression.expressions 1225 1226 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1227 arg = expressions[0] 1228 if arg.type is None: 1229 from sqlglot.optimizer.annotate_types import annotate_types 1230 1231 arg = annotate_types(arg) 1232 1233 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1234 # BQ doesn't support bracket syntax with string values for structs 1235 return f"{self.sql(this)}.{arg.name}" 1236 1237 expressions_sql = self.expressions(expression, flat=True) 1238 offset = expression.args.get("offset") 1239 1240 if offset == 0: 1241 expressions_sql = f"OFFSET({expressions_sql})" 1242 elif offset == 1: 1243 expressions_sql = f"ORDINAL({expressions_sql})" 1244 elif offset is not None: 1245 self.unsupported(f"Unsupported array offset: {offset}") 1246 1247 if expression.args.get("safe"): 1248 expressions_sql = f"SAFE_{expressions_sql}" 1249 1250 return f"{self.sql(this)}[{expressions_sql}]"
1270 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1271 this = expression.this 1272 1273 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1274 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1275 # because they aren't literals and so the above syntax is invalid BigQuery. 1276 if isinstance(this, exp.Array): 1277 elem = seq_get(this.expressions, 0) 1278 if not (elem and elem.find(exp.Query)): 1279 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1280 1281 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql