sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 annotate_with_type_lambda, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 build_formatted_time, 17 filter_array_using_unnest, 18 if_sql, 19 inline_array_unless_query, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 build_date_delta_with_interval, 24 regexp_replace_sql, 25 rename_func, 26 sha256_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_var, 30 strposition_sql, 31 groupconcat_sql, 32) 33from sqlglot.helper import seq_get, split_num_words 34from sqlglot.tokens import TokenType 35from sqlglot.generator import unsupported_args 36 37if t.TYPE_CHECKING: 38 from sqlglot._typing import E, Lit 39 40 from sqlglot.optimizer.annotate_types import TypeAnnotator 41 42logger = logging.getLogger("sqlglot") 43 44 45JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 46 47DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 48 49 50def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 51 if not expression.find_ancestor(exp.From, exp.Join): 52 return self.values_sql(expression) 53 54 structs = [] 55 alias = expression.args.get("alias") 56 for tup in expression.find_all(exp.Tuple): 57 field_aliases = ( 58 alias.columns 59 if alias and alias.columns 60 else (f"_c{i}" for i in range(len(tup.expressions))) 61 ) 62 expressions = [ 63 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 64 for name, fld in zip(field_aliases, tup.expressions) 65 ] 66 structs.append(exp.Struct(expressions=expressions)) 67 68 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 69 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 70 return self.unnest_sql( 71 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 72 ) 73 74 75def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 76 this = expression.this 77 if isinstance(this, exp.Schema): 78 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 79 else: 80 this = self.sql(this) 81 return f"RETURNS {this}" 82 83 84def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 85 returns = expression.find(exp.ReturnsProperty) 86 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 87 expression.set("kind", "TABLE FUNCTION") 88 89 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 90 expression.set("expression", expression.expression.this) 91 92 return self.create_sql(expression) 93 94 95# https://issuetracker.google.com/issues/162294746 96# workaround for bigquery bug when grouping by an expression and then ordering 97# WITH x AS (SELECT 1 y) 98# SELECT y + 1 z 99# FROM x 100# GROUP BY x + 1 101# ORDER by z 102def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 103 if isinstance(expression, exp.Select): 104 group = expression.args.get("group") 105 order = expression.args.get("order") 106 107 if group and order: 108 aliases = { 109 select.this: select.args["alias"] 110 for select in expression.selects 111 if isinstance(select, exp.Alias) 112 } 113 114 for grouped in group.expressions: 115 if grouped.is_int: 116 continue 117 alias = aliases.get(grouped) 118 if alias: 119 grouped.replace(exp.column(alias)) 120 121 return expression 122 123 124def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 125 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 126 if isinstance(expression, exp.CTE) and expression.alias_column_names: 127 cte_query = expression.this 128 129 if cte_query.is_star: 130 logger.warning( 131 "Can't push down CTE column names for star queries. Run the query through" 132 " the optimizer or use 'qualify' to expand the star projections first." 133 ) 134 return expression 135 136 column_names = expression.alias_column_names 137 expression.args["alias"].set("columns", None) 138 139 for name, select in zip(column_names, cte_query.selects): 140 to_replace = select 141 142 if isinstance(select, exp.Alias): 143 select = select.this 144 145 # Inner aliases are shadowed by the CTE column names 146 to_replace.replace(exp.alias_(select, name)) 147 148 return expression 149 150 151def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 152 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 153 this.set("zone", seq_get(args, 2)) 154 return this 155 156 157def _build_timestamp(args: t.List) -> exp.Timestamp: 158 timestamp = exp.Timestamp.from_arg_list(args) 159 timestamp.set("with_tz", True) 160 return timestamp 161 162 163def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 164 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 165 return expr_type.from_arg_list(args) 166 167 168def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 169 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 170 arg = seq_get(args, 0) 171 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 172 173 174def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 175 return self.sql( 176 exp.Exists( 177 this=exp.select("1") 178 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 179 .where(exp.column("_col").eq(expression.right)) 180 ) 181 ) 182 183 184def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 185 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 186 187 188def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 189 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 190 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 191 unit = unit_to_var(expression) 192 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 193 194 195def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 196 scale = expression.args.get("scale") 197 timestamp = expression.this 198 199 if scale in (None, exp.UnixToTime.SECONDS): 200 return self.func("TIMESTAMP_SECONDS", timestamp) 201 if scale == exp.UnixToTime.MILLIS: 202 return self.func("TIMESTAMP_MILLIS", timestamp) 203 if scale == exp.UnixToTime.MICROS: 204 return self.func("TIMESTAMP_MICROS", timestamp) 205 206 unix_seconds = exp.cast( 207 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 208 ) 209 return self.func("TIMESTAMP_SECONDS", unix_seconds) 210 211 212def _build_time(args: t.List) -> exp.Func: 213 if len(args) == 1: 214 return exp.TsOrDsToTime(this=args[0]) 215 if len(args) == 2: 216 return exp.Time.from_arg_list(args) 217 return exp.TimeFromParts.from_arg_list(args) 218 219 220def _build_datetime(args: t.List) -> exp.Func: 221 if len(args) == 1: 222 return exp.TsOrDsToDatetime.from_arg_list(args) 223 if len(args) == 2: 224 return exp.Datetime.from_arg_list(args) 225 return exp.TimestampFromParts.from_arg_list(args) 226 227 228def _build_regexp_extract( 229 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 230) -> t.Callable[[t.List], E]: 231 def _builder(args: t.List) -> E: 232 try: 233 group = re.compile(args[1].name).groups == 1 234 except re.error: 235 group = False 236 237 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 238 return expr_type( 239 this=seq_get(args, 0), 240 expression=seq_get(args, 1), 241 position=seq_get(args, 2), 242 occurrence=seq_get(args, 3), 243 group=exp.Literal.number(1) if group else default_group, 244 ) 245 246 return _builder 247 248 249def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 250 def _builder(args: t.List, dialect: Dialect) -> E: 251 if len(args) == 1: 252 # The default value for the JSONPath is '$' i.e all of the data 253 args.append(exp.Literal.string("$")) 254 return parser.build_extract_json_with_path(expr_type)(args, dialect) 255 256 return _builder 257 258 259def _str_to_datetime_sql( 260 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 261) -> str: 262 this = self.sql(expression, "this") 263 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 264 265 if expression.args.get("safe"): 266 fmt = self.format_time( 267 expression, 268 self.dialect.INVERSE_FORMAT_MAPPING, 269 self.dialect.INVERSE_FORMAT_TRIE, 270 ) 271 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 272 273 fmt = self.format_time(expression) 274 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 275 276 277def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 278 """ 279 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 280 +---------+---------+---------+------------+---------+ 281 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 282 +---------+---------+---------+------------+---------+ 283 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 284 +---------+---------+---------+------------+---------+ 285 """ 286 self._annotate_args(expression) 287 288 this: exp.Expression = expression.this 289 290 self._set_type( 291 expression, 292 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 293 ) 294 return expression 295 296 297@unsupported_args("ins_cost", "del_cost", "sub_cost") 298def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 299 max_dist = expression.args.get("max_dist") 300 if max_dist: 301 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 302 303 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 304 305 306def _build_levenshtein(args: t.List) -> exp.Levenshtein: 307 max_dist = seq_get(args, 2) 308 return exp.Levenshtein( 309 this=seq_get(args, 0), 310 expression=seq_get(args, 1), 311 max_dist=max_dist.expression if max_dist else None, 312 ) 313 314 315def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 316 def _builder(args: t.List) -> exp.TimeToStr: 317 return exp.TimeToStr( 318 this=expr_type(this=seq_get(args, 1)), 319 format=seq_get(args, 0), 320 zone=seq_get(args, 2), 321 ) 322 323 return _builder 324 325 326def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 327 if len(args) == 3: 328 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 329 330 # Lowercase the operands in case of transpilation, as exp.Contains 331 # is case-sensitive on other dialects 332 this = exp.Lower(this=seq_get(args, 0)) 333 expr = exp.Lower(this=seq_get(args, 1)) 334 335 return exp.Contains(this=this, expression=expr) 336 337 338def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 339 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 340 upper = name.upper() 341 342 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 343 344 if dquote_escaping: 345 self._quote_json_path_key_using_brackets = False 346 347 sql = rename_func(upper)(self, expression) 348 349 if dquote_escaping: 350 self._quote_json_path_key_using_brackets = True 351 352 return sql 353 354 355class BigQuery(Dialect): 356 WEEK_OFFSET = -1 357 UNNEST_COLUMN_ONLY = True 358 SUPPORTS_USER_DEFINED_TYPES = False 359 SUPPORTS_SEMI_ANTI_JOIN = False 360 LOG_BASE_FIRST = False 361 HEX_LOWERCASE = True 362 FORCE_EARLY_ALIAS_REF_EXPANSION = True 363 PRESERVE_ORIGINAL_NAMES = True 364 HEX_STRING_IS_INTEGER_TYPE = True 365 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 367 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 368 369 # bigquery udfs are case sensitive 370 NORMALIZE_FUNCTIONS = False 371 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 373 TIME_MAPPING = { 374 "%D": "%m/%d/%y", 375 "%E6S": "%S.%f", 376 "%e": "%-d", 377 } 378 379 FORMAT_MAPPING = { 380 "DD": "%d", 381 "MM": "%m", 382 "MON": "%b", 383 "MONTH": "%B", 384 "YYYY": "%Y", 385 "YY": "%y", 386 "HH": "%I", 387 "HH12": "%I", 388 "HH24": "%H", 389 "MI": "%M", 390 "SS": "%S", 391 "SSSSS": "%f", 392 "TZH": "%z", 393 } 394 395 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 396 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 397 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 398 399 # All set operations require either a DISTINCT or ALL specifier 400 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 401 402 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 403 TYPE_TO_EXPRESSIONS = { 404 **Dialect.TYPE_TO_EXPRESSIONS, 405 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 406 } 407 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 408 409 ANNOTATORS = { 410 **Dialect.ANNOTATORS, 411 **{ 412 expr_type: annotate_with_type_lambda(data_type) 413 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 414 for expr_type in expressions 415 }, 416 **{ 417 expr_type: lambda self, e: _annotate_math_functions(self, e) 418 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 419 }, 420 **{ 421 expr_type: lambda self, e: self._annotate_by_args(e, "this") 422 for expr_type in ( 423 exp.Left, 424 exp.Right, 425 exp.Lower, 426 exp.Upper, 427 exp.Pad, 428 exp.Trim, 429 exp.RegexpExtract, 430 exp.RegexpReplace, 431 exp.Repeat, 432 exp.Substring, 433 ) 434 }, 435 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 436 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 437 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 438 } 439 440 def normalize_identifier(self, expression: E) -> E: 441 if ( 442 isinstance(expression, exp.Identifier) 443 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 444 ): 445 parent = expression.parent 446 while isinstance(parent, exp.Dot): 447 parent = parent.parent 448 449 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 450 # by default. The following check uses a heuristic to detect tables based on whether 451 # they are qualified. This should generally be correct, because tables in BigQuery 452 # must be qualified with at least a dataset, unless @@dataset_id is set. 453 case_sensitive = ( 454 isinstance(parent, exp.UserDefinedFunction) 455 or ( 456 isinstance(parent, exp.Table) 457 and parent.db 458 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 459 ) 460 or expression.meta.get("is_table") 461 ) 462 if not case_sensitive: 463 expression.set("this", expression.this.lower()) 464 465 return expression 466 467 class Tokenizer(tokens.Tokenizer): 468 QUOTES = ["'", '"', '"""', "'''"] 469 COMMENTS = ["--", "#", ("/*", "*/")] 470 IDENTIFIERS = ["`"] 471 STRING_ESCAPES = ["\\"] 472 473 HEX_STRINGS = [("0x", ""), ("0X", "")] 474 475 BYTE_STRINGS = [ 476 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 477 ] 478 479 RAW_STRINGS = [ 480 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 481 ] 482 483 KEYWORDS = { 484 **tokens.Tokenizer.KEYWORDS, 485 "ANY TYPE": TokenType.VARIANT, 486 "BEGIN": TokenType.COMMAND, 487 "BEGIN TRANSACTION": TokenType.BEGIN, 488 "BYTEINT": TokenType.INT, 489 "BYTES": TokenType.BINARY, 490 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 491 "DATETIME": TokenType.TIMESTAMP, 492 "DECLARE": TokenType.COMMAND, 493 "ELSEIF": TokenType.COMMAND, 494 "EXCEPTION": TokenType.COMMAND, 495 "EXPORT": TokenType.EXPORT, 496 "FLOAT64": TokenType.DOUBLE, 497 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 498 "MODEL": TokenType.MODEL, 499 "NOT DETERMINISTIC": TokenType.VOLATILE, 500 "RECORD": TokenType.STRUCT, 501 "TIMESTAMP": TokenType.TIMESTAMPTZ, 502 } 503 KEYWORDS.pop("DIV") 504 KEYWORDS.pop("VALUES") 505 KEYWORDS.pop("/*+") 506 507 class Parser(parser.Parser): 508 PREFIXED_PIVOT_COLUMNS = True 509 LOG_DEFAULTS_TO_LN = True 510 SUPPORTS_IMPLICIT_UNNEST = True 511 512 FUNCTIONS = { 513 **parser.Parser.FUNCTIONS, 514 "CONTAINS_SUBSTR": _build_contains_substring, 515 "DATE": _build_date, 516 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 517 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 518 "DATE_TRUNC": lambda args: exp.DateTrunc( 519 unit=exp.Literal.string(str(seq_get(args, 1))), 520 this=seq_get(args, 0), 521 zone=seq_get(args, 2), 522 ), 523 "DATETIME": _build_datetime, 524 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 525 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 526 "DIV": binary_from_function(exp.IntDiv), 527 "EDIT_DISTANCE": _build_levenshtein, 528 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 529 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 530 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 531 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 532 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 533 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 534 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 535 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 536 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 537 "MD5": exp.MD5Digest.from_arg_list, 538 "TO_HEX": _build_to_hex, 539 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 540 [seq_get(args, 1), seq_get(args, 0)] 541 ), 542 "PARSE_TIMESTAMP": _build_parse_timestamp, 543 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 544 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 545 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 546 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 547 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 548 ), 549 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 550 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 551 "SPLIT": lambda args: exp.Split( 552 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 553 this=seq_get(args, 0), 554 expression=seq_get(args, 1) or exp.Literal.string(","), 555 ), 556 "STRPOS": exp.StrPosition.from_arg_list, 557 "TIME": _build_time, 558 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 559 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 560 "TIMESTAMP": _build_timestamp, 561 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 562 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 563 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 564 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 565 ), 566 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 567 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 568 ), 569 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 570 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 571 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 572 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 573 } 574 575 FUNCTION_PARSERS = { 576 **parser.Parser.FUNCTION_PARSERS, 577 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 578 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 579 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 580 } 581 FUNCTION_PARSERS.pop("TRIM") 582 583 NO_PAREN_FUNCTIONS = { 584 **parser.Parser.NO_PAREN_FUNCTIONS, 585 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 586 } 587 588 NESTED_TYPE_TOKENS = { 589 *parser.Parser.NESTED_TYPE_TOKENS, 590 TokenType.TABLE, 591 } 592 593 PROPERTY_PARSERS = { 594 **parser.Parser.PROPERTY_PARSERS, 595 "NOT DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 597 ), 598 "OPTIONS": lambda self: self._parse_with_property(), 599 } 600 601 CONSTRAINT_PARSERS = { 602 **parser.Parser.CONSTRAINT_PARSERS, 603 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 604 } 605 606 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 607 RANGE_PARSERS.pop(TokenType.OVERLAPS) 608 609 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 610 611 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 612 613 STATEMENT_PARSERS = { 614 **parser.Parser.STATEMENT_PARSERS, 615 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 616 TokenType.END: lambda self: self._parse_as_command(self._prev), 617 TokenType.FOR: lambda self: self._parse_for_in(), 618 TokenType.EXPORT: lambda self: self._parse_export_data(), 619 } 620 621 BRACKET_OFFSETS = { 622 "OFFSET": (0, False), 623 "ORDINAL": (1, False), 624 "SAFE_OFFSET": (0, True), 625 "SAFE_ORDINAL": (1, True), 626 } 627 628 def _parse_for_in(self) -> exp.ForIn: 629 this = self._parse_range() 630 self._match_text_seq("DO") 631 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 632 633 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 634 this = super()._parse_table_part(schema=schema) or self._parse_number() 635 636 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 637 if isinstance(this, exp.Identifier): 638 table_name = this.name 639 while self._match(TokenType.DASH, advance=False) and self._next: 640 start = self._curr 641 while self._is_connected() and not self._match_set( 642 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 643 ): 644 self._advance() 645 646 table_name += self._find_sql(start, self._prev) 647 648 this = exp.Identifier( 649 this=table_name, quoted=this.args.get("quoted") 650 ).update_positions(this) 651 elif isinstance(this, exp.Literal): 652 table_name = this.name 653 654 if self._is_connected() and self._parse_var(any_token=True): 655 table_name += self._prev.text 656 657 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 658 659 return this 660 661 def _parse_table_parts( 662 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 663 ) -> exp.Table: 664 table = super()._parse_table_parts( 665 schema=schema, is_db_reference=is_db_reference, wildcard=True 666 ) 667 668 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 669 if not table.catalog: 670 if table.db: 671 previous_db = table.args["db"] 672 parts = table.db.split(".") 673 if len(parts) == 2 and not table.args["db"].quoted: 674 table.set( 675 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 676 ) 677 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 678 else: 679 previous_this = table.this 680 parts = table.name.split(".") 681 if len(parts) == 2 and not table.this.quoted: 682 table.set( 683 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 684 ) 685 table.set( 686 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 687 ) 688 689 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 690 alias = table.this 691 catalog, db, this, *rest = ( 692 exp.to_identifier(p, quoted=True) 693 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 694 ) 695 696 for part in (catalog, db, this): 697 if part: 698 part.update_positions(table.this) 699 700 if rest and this: 701 this = exp.Dot.build([this, *rest]) # type: ignore 702 703 table = exp.Table( 704 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 705 ) 706 table.meta["quoted_table"] = True 707 else: 708 alias = None 709 710 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 711 # dataset, so if the project identifier is omitted we need to fix the ast so that 712 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 713 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 714 # views, because it would seem like the "catalog" part is set, when it'd actually 715 # be the region/dataset. Merging the two identifiers into a single one is done to 716 # avoid producing a 4-part Table reference, which would cause issues in the schema 717 # module, when there are 3-part table names mixed with information schema views. 718 # 719 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 720 table_parts = table.parts 721 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 722 # We need to alias the table here to avoid breaking existing qualified columns. 723 # This is expected to be safe, because if there's an actual alias coming up in 724 # the token stream, it will overwrite this one. If there isn't one, we are only 725 # exposing the name that can be used to reference the view explicitly (a no-op). 726 exp.alias_( 727 table, 728 t.cast(exp.Identifier, alias or table_parts[-1]), 729 table=True, 730 copy=False, 731 ) 732 733 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 734 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 735 line=table_parts[-2].meta.get("line"), 736 col=table_parts[-1].meta.get("col"), 737 start=table_parts[-2].meta.get("start"), 738 end=table_parts[-1].meta.get("end"), 739 ) 740 table.set("this", new_this) 741 table.set("db", seq_get(table_parts, -3)) 742 table.set("catalog", seq_get(table_parts, -4)) 743 744 return table 745 746 def _parse_column(self) -> t.Optional[exp.Expression]: 747 column = super()._parse_column() 748 if isinstance(column, exp.Column): 749 parts = column.parts 750 if any("." in p.name for p in parts): 751 catalog, db, table, this, *rest = ( 752 exp.to_identifier(p, quoted=True) 753 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 754 ) 755 756 if rest and this: 757 this = exp.Dot.build([this, *rest]) # type: ignore 758 759 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 760 column.meta["quoted_column"] = True 761 762 return column 763 764 @t.overload 765 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 766 767 @t.overload 768 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 769 770 def _parse_json_object(self, agg=False): 771 json_object = super()._parse_json_object() 772 array_kv_pair = seq_get(json_object.expressions, 0) 773 774 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 775 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 776 if ( 777 array_kv_pair 778 and isinstance(array_kv_pair.this, exp.Array) 779 and isinstance(array_kv_pair.expression, exp.Array) 780 ): 781 keys = array_kv_pair.this.expressions 782 values = array_kv_pair.expression.expressions 783 784 json_object.set( 785 "expressions", 786 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 787 ) 788 789 return json_object 790 791 def _parse_bracket( 792 self, this: t.Optional[exp.Expression] = None 793 ) -> t.Optional[exp.Expression]: 794 bracket = super()._parse_bracket(this) 795 796 if this is bracket: 797 return bracket 798 799 if isinstance(bracket, exp.Bracket): 800 for expression in bracket.expressions: 801 name = expression.name.upper() 802 803 if name not in self.BRACKET_OFFSETS: 804 break 805 806 offset, safe = self.BRACKET_OFFSETS[name] 807 bracket.set("offset", offset) 808 bracket.set("safe", safe) 809 expression.replace(expression.expressions[0]) 810 811 return bracket 812 813 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 814 unnest = super()._parse_unnest(with_alias=with_alias) 815 816 if not unnest: 817 return None 818 819 unnest_expr = seq_get(unnest.expressions, 0) 820 if unnest_expr: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 824 825 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 826 # in contrast to other dialects such as DuckDB which flattens only the array by default 827 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 828 array_elem.is_type(exp.DataType.Type.STRUCT) 829 for array_elem in unnest_expr._type.expressions 830 ): 831 unnest.set("explode_array", True) 832 833 return unnest 834 835 def _parse_make_interval(self) -> exp.MakeInterval: 836 expr = exp.MakeInterval() 837 838 for arg_key in expr.arg_types: 839 value = self._parse_lambda() 840 841 if not value: 842 break 843 844 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 845 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 846 if isinstance(value, exp.Kwarg): 847 arg_key = value.this.name 848 849 expr.set(arg_key, value) 850 851 self._match(TokenType.COMMA) 852 853 return expr 854 855 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 856 expr = self.expression( 857 exp.FeaturesAtTime, 858 this=(self._match(TokenType.TABLE) and self._parse_table()) 859 or self._parse_select(nested=True), 860 ) 861 862 while self._match(TokenType.COMMA): 863 arg = self._parse_lambda() 864 865 # Get the LHS of the Kwarg and set the arg to that value, e.g 866 # "num_rows => 1" sets the expr's `num_rows` arg 867 if arg: 868 expr.set(arg.this.name, arg) 869 870 return expr 871 872 def _parse_export_data(self) -> exp.Export: 873 self._match_text_seq("DATA") 874 875 return self.expression( 876 exp.Export, 877 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 878 options=self._parse_properties(), 879 this=self._match_text_seq("AS") and self._parse_select(), 880 ) 881 882 class Generator(generator.Generator): 883 INTERVAL_ALLOWS_PLURAL_FORM = False 884 JOIN_HINTS = False 885 QUERY_HINTS = False 886 TABLE_HINTS = False 887 LIMIT_FETCH = "LIMIT" 888 RENAME_TABLE_WITH_DB = False 889 NVL2_SUPPORTED = False 890 UNNEST_WITH_ORDINALITY = False 891 COLLATE_IS_FUNC = True 892 LIMIT_ONLY_LITERALS = True 893 SUPPORTS_TABLE_ALIAS_COLUMNS = False 894 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 895 JSON_KEY_VALUE_PAIR_SEP = "," 896 NULL_ORDERING_SUPPORTED = False 897 IGNORE_NULLS_IN_FUNC = True 898 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 899 CAN_IMPLEMENT_ARRAY_ANY = True 900 SUPPORTS_TO_NUMBER = False 901 NAMED_PLACEHOLDER_TOKEN = "@" 902 HEX_FUNC = "TO_HEX" 903 WITH_PROPERTIES_PREFIX = "OPTIONS" 904 SUPPORTS_EXPLODING_PROJECTIONS = False 905 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 906 SUPPORTS_UNIX_SECONDS = True 907 908 TRANSFORMS = { 909 **generator.Generator.TRANSFORMS, 910 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 911 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 912 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 913 exp.Array: inline_array_unless_query, 914 exp.ArrayContains: _array_contains_sql, 915 exp.ArrayFilter: filter_array_using_unnest, 916 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 917 exp.CollateProperty: lambda self, e: ( 918 f"DEFAULT COLLATE {self.sql(e, 'this')}" 919 if e.args.get("default") 920 else f"COLLATE {self.sql(e, 'this')}" 921 ), 922 exp.Commit: lambda *_: "COMMIT TRANSACTION", 923 exp.CountIf: rename_func("COUNTIF"), 924 exp.Create: _create_sql, 925 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 926 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 927 exp.DateDiff: lambda self, e: self.func( 928 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 929 ), 930 exp.DateFromParts: rename_func("DATE"), 931 exp.DateStrToDate: datestrtodate_sql, 932 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 933 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 934 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 935 exp.DateTrunc: lambda self, e: self.func( 936 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 937 ), 938 exp.FromTimeZone: lambda self, e: self.func( 939 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 940 ), 941 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 942 exp.GroupConcat: lambda self, e: groupconcat_sql( 943 self, e, func_name="STRING_AGG", within_group=False 944 ), 945 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 946 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 947 exp.If: if_sql(false_value="NULL"), 948 exp.ILike: no_ilike_sql, 949 exp.IntDiv: rename_func("DIV"), 950 exp.Int64: rename_func("INT64"), 951 exp.JSONExtract: _json_extract_sql, 952 exp.JSONExtractArray: _json_extract_sql, 953 exp.JSONExtractScalar: _json_extract_sql, 954 exp.JSONFormat: rename_func("TO_JSON_STRING"), 955 exp.Levenshtein: _levenshtein_sql, 956 exp.Max: max_or_greatest, 957 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 958 exp.MD5Digest: rename_func("MD5"), 959 exp.Min: min_or_least, 960 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 961 exp.RegexpExtract: lambda self, e: self.func( 962 "REGEXP_EXTRACT", 963 e.this, 964 e.expression, 965 e.args.get("position"), 966 e.args.get("occurrence"), 967 ), 968 exp.RegexpExtractAll: lambda self, e: self.func( 969 "REGEXP_EXTRACT_ALL", e.this, e.expression 970 ), 971 exp.RegexpReplace: regexp_replace_sql, 972 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 973 exp.ReturnsProperty: _returnsproperty_sql, 974 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 975 exp.Select: transforms.preprocess( 976 [ 977 transforms.explode_projection_to_unnest(), 978 transforms.unqualify_unnest, 979 transforms.eliminate_distinct_on, 980 _alias_ordered_group, 981 transforms.eliminate_semi_and_anti_joins, 982 ] 983 ), 984 exp.SHA: rename_func("SHA1"), 985 exp.SHA2: sha256_sql, 986 exp.StabilityProperty: lambda self, e: ( 987 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 988 ), 989 exp.String: rename_func("STRING"), 990 exp.StrPosition: lambda self, e: ( 991 strposition_sql( 992 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 993 ) 994 ), 995 exp.StrToDate: _str_to_datetime_sql, 996 exp.StrToTime: _str_to_datetime_sql, 997 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 998 exp.TimeFromParts: rename_func("TIME"), 999 exp.TimestampFromParts: rename_func("DATETIME"), 1000 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1001 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1002 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1003 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1004 exp.TimeStrToTime: timestrtotime_sql, 1005 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1006 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1007 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1008 exp.TsOrDsToTime: rename_func("TIME"), 1009 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1010 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1011 exp.Unhex: rename_func("FROM_HEX"), 1012 exp.UnixDate: rename_func("UNIX_DATE"), 1013 exp.UnixToTime: _unix_to_time_sql, 1014 exp.Uuid: lambda *_: "GENERATE_UUID()", 1015 exp.Values: _derived_table_values_to_unnest, 1016 exp.VariancePop: rename_func("VAR_POP"), 1017 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1018 } 1019 1020 SUPPORTED_JSON_PATH_PARTS = { 1021 exp.JSONPathKey, 1022 exp.JSONPathRoot, 1023 exp.JSONPathSubscript, 1024 } 1025 1026 TYPE_MAPPING = { 1027 **generator.Generator.TYPE_MAPPING, 1028 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1029 exp.DataType.Type.BIGINT: "INT64", 1030 exp.DataType.Type.BINARY: "BYTES", 1031 exp.DataType.Type.BLOB: "BYTES", 1032 exp.DataType.Type.BOOLEAN: "BOOL", 1033 exp.DataType.Type.CHAR: "STRING", 1034 exp.DataType.Type.DECIMAL: "NUMERIC", 1035 exp.DataType.Type.DOUBLE: "FLOAT64", 1036 exp.DataType.Type.FLOAT: "FLOAT64", 1037 exp.DataType.Type.INT: "INT64", 1038 exp.DataType.Type.NCHAR: "STRING", 1039 exp.DataType.Type.NVARCHAR: "STRING", 1040 exp.DataType.Type.SMALLINT: "INT64", 1041 exp.DataType.Type.TEXT: "STRING", 1042 exp.DataType.Type.TIMESTAMP: "DATETIME", 1043 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1044 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1045 exp.DataType.Type.TINYINT: "INT64", 1046 exp.DataType.Type.ROWVERSION: "BYTES", 1047 exp.DataType.Type.UUID: "STRING", 1048 exp.DataType.Type.VARBINARY: "BYTES", 1049 exp.DataType.Type.VARCHAR: "STRING", 1050 exp.DataType.Type.VARIANT: "ANY TYPE", 1051 } 1052 1053 PROPERTIES_LOCATION = { 1054 **generator.Generator.PROPERTIES_LOCATION, 1055 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1056 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1057 } 1058 1059 # WINDOW comes after QUALIFY 1060 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1061 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1062 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1063 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1064 } 1065 1066 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1067 RESERVED_KEYWORDS = { 1068 "all", 1069 "and", 1070 "any", 1071 "array", 1072 "as", 1073 "asc", 1074 "assert_rows_modified", 1075 "at", 1076 "between", 1077 "by", 1078 "case", 1079 "cast", 1080 "collate", 1081 "contains", 1082 "create", 1083 "cross", 1084 "cube", 1085 "current", 1086 "default", 1087 "define", 1088 "desc", 1089 "distinct", 1090 "else", 1091 "end", 1092 "enum", 1093 "escape", 1094 "except", 1095 "exclude", 1096 "exists", 1097 "extract", 1098 "false", 1099 "fetch", 1100 "following", 1101 "for", 1102 "from", 1103 "full", 1104 "group", 1105 "grouping", 1106 "groups", 1107 "hash", 1108 "having", 1109 "if", 1110 "ignore", 1111 "in", 1112 "inner", 1113 "intersect", 1114 "interval", 1115 "into", 1116 "is", 1117 "join", 1118 "lateral", 1119 "left", 1120 "like", 1121 "limit", 1122 "lookup", 1123 "merge", 1124 "natural", 1125 "new", 1126 "no", 1127 "not", 1128 "null", 1129 "nulls", 1130 "of", 1131 "on", 1132 "or", 1133 "order", 1134 "outer", 1135 "over", 1136 "partition", 1137 "preceding", 1138 "proto", 1139 "qualify", 1140 "range", 1141 "recursive", 1142 "respect", 1143 "right", 1144 "rollup", 1145 "rows", 1146 "select", 1147 "set", 1148 "some", 1149 "struct", 1150 "tablesample", 1151 "then", 1152 "to", 1153 "treat", 1154 "true", 1155 "unbounded", 1156 "union", 1157 "unnest", 1158 "using", 1159 "when", 1160 "where", 1161 "window", 1162 "with", 1163 "within", 1164 } 1165 1166 def mod_sql(self, expression: exp.Mod) -> str: 1167 this = expression.this 1168 expr = expression.expression 1169 return self.func( 1170 "MOD", 1171 this.unnest() if isinstance(this, exp.Paren) else this, 1172 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1173 ) 1174 1175 def column_parts(self, expression: exp.Column) -> str: 1176 if expression.meta.get("quoted_column"): 1177 # If a column reference is of the form `dataset.table`.name, we need 1178 # to preserve the quoted table path, otherwise the reference breaks 1179 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1180 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1181 return f"{table_path}.{self.sql(expression, 'this')}" 1182 1183 return super().column_parts(expression) 1184 1185 def table_parts(self, expression: exp.Table) -> str: 1186 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1187 # we need to make sure the correct quoting is used in each case. 1188 # 1189 # For example, if there is a CTE x that clashes with a schema name, then the former will 1190 # return the table y in that schema, whereas the latter will return the CTE's y column: 1191 # 1192 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1193 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1194 if expression.meta.get("quoted_table"): 1195 table_parts = ".".join(p.name for p in expression.parts) 1196 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1197 1198 return super().table_parts(expression) 1199 1200 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1201 this = expression.this 1202 if isinstance(this, exp.TsOrDsToDatetime): 1203 func_name = "FORMAT_DATETIME" 1204 elif isinstance(this, exp.TsOrDsToTimestamp): 1205 func_name = "FORMAT_TIMESTAMP" 1206 else: 1207 func_name = "FORMAT_DATE" 1208 1209 time_expr = ( 1210 this 1211 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1212 else expression 1213 ) 1214 return self.func( 1215 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1216 ) 1217 1218 def eq_sql(self, expression: exp.EQ) -> str: 1219 # Operands of = cannot be NULL in BigQuery 1220 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1221 if not isinstance(expression.parent, exp.Update): 1222 return "NULL" 1223 1224 return self.binary(expression, "=") 1225 1226 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1227 parent = expression.parent 1228 1229 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1230 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1231 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1232 return self.func( 1233 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1234 ) 1235 1236 return super().attimezone_sql(expression) 1237 1238 def trycast_sql(self, expression: exp.TryCast) -> str: 1239 return self.cast_sql(expression, safe_prefix="SAFE_") 1240 1241 def bracket_sql(self, expression: exp.Bracket) -> str: 1242 this = expression.this 1243 expressions = expression.expressions 1244 1245 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1246 arg = expressions[0] 1247 if arg.type is None: 1248 from sqlglot.optimizer.annotate_types import annotate_types 1249 1250 arg = annotate_types(arg, dialect=self.dialect) 1251 1252 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1253 # BQ doesn't support bracket syntax with string values for structs 1254 return f"{self.sql(this)}.{arg.name}" 1255 1256 expressions_sql = self.expressions(expression, flat=True) 1257 offset = expression.args.get("offset") 1258 1259 if offset == 0: 1260 expressions_sql = f"OFFSET({expressions_sql})" 1261 elif offset == 1: 1262 expressions_sql = f"ORDINAL({expressions_sql})" 1263 elif offset is not None: 1264 self.unsupported(f"Unsupported array offset: {offset}") 1265 1266 if expression.args.get("safe"): 1267 expressions_sql = f"SAFE_{expressions_sql}" 1268 1269 return f"{self.sql(this)}[{expressions_sql}]" 1270 1271 def in_unnest_op(self, expression: exp.Unnest) -> str: 1272 return self.sql(expression) 1273 1274 def version_sql(self, expression: exp.Version) -> str: 1275 if expression.name == "TIMESTAMP": 1276 expression.set("this", "SYSTEM_TIME") 1277 return super().version_sql(expression) 1278 1279 def contains_sql(self, expression: exp.Contains) -> str: 1280 this = expression.this 1281 expr = expression.expression 1282 1283 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1284 this = this.this 1285 expr = expr.this 1286 1287 return self.func("CONTAINS_SUBSTR", this, expr) 1288 1289 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1290 this = expression.this 1291 1292 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1293 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1294 # because they aren't literals and so the above syntax is invalid BigQuery. 1295 if isinstance(this, exp.Array): 1296 elem = seq_get(this.expressions, 0) 1297 if not (elem and elem.find(exp.Query)): 1298 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1299 1300 return super().cast_sql(expression, safe_prefix=safe_prefix)
356class BigQuery(Dialect): 357 WEEK_OFFSET = -1 358 UNNEST_COLUMN_ONLY = True 359 SUPPORTS_USER_DEFINED_TYPES = False 360 SUPPORTS_SEMI_ANTI_JOIN = False 361 LOG_BASE_FIRST = False 362 HEX_LOWERCASE = True 363 FORCE_EARLY_ALIAS_REF_EXPANSION = True 364 PRESERVE_ORIGINAL_NAMES = True 365 HEX_STRING_IS_INTEGER_TYPE = True 366 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 368 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 369 370 # bigquery udfs are case sensitive 371 NORMALIZE_FUNCTIONS = False 372 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 374 TIME_MAPPING = { 375 "%D": "%m/%d/%y", 376 "%E6S": "%S.%f", 377 "%e": "%-d", 378 } 379 380 FORMAT_MAPPING = { 381 "DD": "%d", 382 "MM": "%m", 383 "MON": "%b", 384 "MONTH": "%B", 385 "YYYY": "%Y", 386 "YY": "%y", 387 "HH": "%I", 388 "HH12": "%I", 389 "HH24": "%H", 390 "MI": "%M", 391 "SS": "%S", 392 "SSSSS": "%f", 393 "TZH": "%z", 394 } 395 396 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 397 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 398 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 399 400 # All set operations require either a DISTINCT or ALL specifier 401 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 402 403 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 404 TYPE_TO_EXPRESSIONS = { 405 **Dialect.TYPE_TO_EXPRESSIONS, 406 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 407 } 408 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 409 410 ANNOTATORS = { 411 **Dialect.ANNOTATORS, 412 **{ 413 expr_type: annotate_with_type_lambda(data_type) 414 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 415 for expr_type in expressions 416 }, 417 **{ 418 expr_type: lambda self, e: _annotate_math_functions(self, e) 419 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 420 }, 421 **{ 422 expr_type: lambda self, e: self._annotate_by_args(e, "this") 423 for expr_type in ( 424 exp.Left, 425 exp.Right, 426 exp.Lower, 427 exp.Upper, 428 exp.Pad, 429 exp.Trim, 430 exp.RegexpExtract, 431 exp.RegexpReplace, 432 exp.Repeat, 433 exp.Substring, 434 ) 435 }, 436 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 437 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 438 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 439 } 440 441 def normalize_identifier(self, expression: E) -> E: 442 if ( 443 isinstance(expression, exp.Identifier) 444 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 445 ): 446 parent = expression.parent 447 while isinstance(parent, exp.Dot): 448 parent = parent.parent 449 450 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 451 # by default. The following check uses a heuristic to detect tables based on whether 452 # they are qualified. This should generally be correct, because tables in BigQuery 453 # must be qualified with at least a dataset, unless @@dataset_id is set. 454 case_sensitive = ( 455 isinstance(parent, exp.UserDefinedFunction) 456 or ( 457 isinstance(parent, exp.Table) 458 and parent.db 459 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 460 ) 461 or expression.meta.get("is_table") 462 ) 463 if not case_sensitive: 464 expression.set("this", expression.this.lower()) 465 466 return expression 467 468 class Tokenizer(tokens.Tokenizer): 469 QUOTES = ["'", '"', '"""', "'''"] 470 COMMENTS = ["--", "#", ("/*", "*/")] 471 IDENTIFIERS = ["`"] 472 STRING_ESCAPES = ["\\"] 473 474 HEX_STRINGS = [("0x", ""), ("0X", "")] 475 476 BYTE_STRINGS = [ 477 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 478 ] 479 480 RAW_STRINGS = [ 481 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 482 ] 483 484 KEYWORDS = { 485 **tokens.Tokenizer.KEYWORDS, 486 "ANY TYPE": TokenType.VARIANT, 487 "BEGIN": TokenType.COMMAND, 488 "BEGIN TRANSACTION": TokenType.BEGIN, 489 "BYTEINT": TokenType.INT, 490 "BYTES": TokenType.BINARY, 491 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 492 "DATETIME": TokenType.TIMESTAMP, 493 "DECLARE": TokenType.COMMAND, 494 "ELSEIF": TokenType.COMMAND, 495 "EXCEPTION": TokenType.COMMAND, 496 "EXPORT": TokenType.EXPORT, 497 "FLOAT64": TokenType.DOUBLE, 498 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 499 "MODEL": TokenType.MODEL, 500 "NOT DETERMINISTIC": TokenType.VOLATILE, 501 "RECORD": TokenType.STRUCT, 502 "TIMESTAMP": TokenType.TIMESTAMPTZ, 503 } 504 KEYWORDS.pop("DIV") 505 KEYWORDS.pop("VALUES") 506 KEYWORDS.pop("/*+") 507 508 class Parser(parser.Parser): 509 PREFIXED_PIVOT_COLUMNS = True 510 LOG_DEFAULTS_TO_LN = True 511 SUPPORTS_IMPLICIT_UNNEST = True 512 513 FUNCTIONS = { 514 **parser.Parser.FUNCTIONS, 515 "CONTAINS_SUBSTR": _build_contains_substring, 516 "DATE": _build_date, 517 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 518 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 519 "DATE_TRUNC": lambda args: exp.DateTrunc( 520 unit=exp.Literal.string(str(seq_get(args, 1))), 521 this=seq_get(args, 0), 522 zone=seq_get(args, 2), 523 ), 524 "DATETIME": _build_datetime, 525 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 526 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 527 "DIV": binary_from_function(exp.IntDiv), 528 "EDIT_DISTANCE": _build_levenshtein, 529 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 530 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 531 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 532 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 533 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 534 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 535 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 536 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 537 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 538 "MD5": exp.MD5Digest.from_arg_list, 539 "TO_HEX": _build_to_hex, 540 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 541 [seq_get(args, 1), seq_get(args, 0)] 542 ), 543 "PARSE_TIMESTAMP": _build_parse_timestamp, 544 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 545 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 546 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 547 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 548 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 549 ), 550 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 551 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 552 "SPLIT": lambda args: exp.Split( 553 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 554 this=seq_get(args, 0), 555 expression=seq_get(args, 1) or exp.Literal.string(","), 556 ), 557 "STRPOS": exp.StrPosition.from_arg_list, 558 "TIME": _build_time, 559 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 560 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 561 "TIMESTAMP": _build_timestamp, 562 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 563 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 564 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 565 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 566 ), 567 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 568 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 569 ), 570 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 571 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 572 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 573 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 574 } 575 576 FUNCTION_PARSERS = { 577 **parser.Parser.FUNCTION_PARSERS, 578 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 579 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 580 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 581 } 582 FUNCTION_PARSERS.pop("TRIM") 583 584 NO_PAREN_FUNCTIONS = { 585 **parser.Parser.NO_PAREN_FUNCTIONS, 586 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 587 } 588 589 NESTED_TYPE_TOKENS = { 590 *parser.Parser.NESTED_TYPE_TOKENS, 591 TokenType.TABLE, 592 } 593 594 PROPERTY_PARSERS = { 595 **parser.Parser.PROPERTY_PARSERS, 596 "NOT DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 598 ), 599 "OPTIONS": lambda self: self._parse_with_property(), 600 } 601 602 CONSTRAINT_PARSERS = { 603 **parser.Parser.CONSTRAINT_PARSERS, 604 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 605 } 606 607 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 608 RANGE_PARSERS.pop(TokenType.OVERLAPS) 609 610 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 611 612 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 613 614 STATEMENT_PARSERS = { 615 **parser.Parser.STATEMENT_PARSERS, 616 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 617 TokenType.END: lambda self: self._parse_as_command(self._prev), 618 TokenType.FOR: lambda self: self._parse_for_in(), 619 TokenType.EXPORT: lambda self: self._parse_export_data(), 620 } 621 622 BRACKET_OFFSETS = { 623 "OFFSET": (0, False), 624 "ORDINAL": (1, False), 625 "SAFE_OFFSET": (0, True), 626 "SAFE_ORDINAL": (1, True), 627 } 628 629 def _parse_for_in(self) -> exp.ForIn: 630 this = self._parse_range() 631 self._match_text_seq("DO") 632 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 633 634 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 635 this = super()._parse_table_part(schema=schema) or self._parse_number() 636 637 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 638 if isinstance(this, exp.Identifier): 639 table_name = this.name 640 while self._match(TokenType.DASH, advance=False) and self._next: 641 start = self._curr 642 while self._is_connected() and not self._match_set( 643 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 644 ): 645 self._advance() 646 647 table_name += self._find_sql(start, self._prev) 648 649 this = exp.Identifier( 650 this=table_name, quoted=this.args.get("quoted") 651 ).update_positions(this) 652 elif isinstance(this, exp.Literal): 653 table_name = this.name 654 655 if self._is_connected() and self._parse_var(any_token=True): 656 table_name += self._prev.text 657 658 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 659 660 return this 661 662 def _parse_table_parts( 663 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 664 ) -> exp.Table: 665 table = super()._parse_table_parts( 666 schema=schema, is_db_reference=is_db_reference, wildcard=True 667 ) 668 669 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 670 if not table.catalog: 671 if table.db: 672 previous_db = table.args["db"] 673 parts = table.db.split(".") 674 if len(parts) == 2 and not table.args["db"].quoted: 675 table.set( 676 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 677 ) 678 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 679 else: 680 previous_this = table.this 681 parts = table.name.split(".") 682 if len(parts) == 2 and not table.this.quoted: 683 table.set( 684 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 685 ) 686 table.set( 687 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 688 ) 689 690 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 691 alias = table.this 692 catalog, db, this, *rest = ( 693 exp.to_identifier(p, quoted=True) 694 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 695 ) 696 697 for part in (catalog, db, this): 698 if part: 699 part.update_positions(table.this) 700 701 if rest and this: 702 this = exp.Dot.build([this, *rest]) # type: ignore 703 704 table = exp.Table( 705 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 706 ) 707 table.meta["quoted_table"] = True 708 else: 709 alias = None 710 711 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 712 # dataset, so if the project identifier is omitted we need to fix the ast so that 713 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 714 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 715 # views, because it would seem like the "catalog" part is set, when it'd actually 716 # be the region/dataset. Merging the two identifiers into a single one is done to 717 # avoid producing a 4-part Table reference, which would cause issues in the schema 718 # module, when there are 3-part table names mixed with information schema views. 719 # 720 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 721 table_parts = table.parts 722 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 723 # We need to alias the table here to avoid breaking existing qualified columns. 724 # This is expected to be safe, because if there's an actual alias coming up in 725 # the token stream, it will overwrite this one. If there isn't one, we are only 726 # exposing the name that can be used to reference the view explicitly (a no-op). 727 exp.alias_( 728 table, 729 t.cast(exp.Identifier, alias or table_parts[-1]), 730 table=True, 731 copy=False, 732 ) 733 734 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 735 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 736 line=table_parts[-2].meta.get("line"), 737 col=table_parts[-1].meta.get("col"), 738 start=table_parts[-2].meta.get("start"), 739 end=table_parts[-1].meta.get("end"), 740 ) 741 table.set("this", new_this) 742 table.set("db", seq_get(table_parts, -3)) 743 table.set("catalog", seq_get(table_parts, -4)) 744 745 return table 746 747 def _parse_column(self) -> t.Optional[exp.Expression]: 748 column = super()._parse_column() 749 if isinstance(column, exp.Column): 750 parts = column.parts 751 if any("." in p.name for p in parts): 752 catalog, db, table, this, *rest = ( 753 exp.to_identifier(p, quoted=True) 754 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 755 ) 756 757 if rest and this: 758 this = exp.Dot.build([this, *rest]) # type: ignore 759 760 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 761 column.meta["quoted_column"] = True 762 763 return column 764 765 @t.overload 766 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 767 768 @t.overload 769 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 770 771 def _parse_json_object(self, agg=False): 772 json_object = super()._parse_json_object() 773 array_kv_pair = seq_get(json_object.expressions, 0) 774 775 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 776 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 777 if ( 778 array_kv_pair 779 and isinstance(array_kv_pair.this, exp.Array) 780 and isinstance(array_kv_pair.expression, exp.Array) 781 ): 782 keys = array_kv_pair.this.expressions 783 values = array_kv_pair.expression.expressions 784 785 json_object.set( 786 "expressions", 787 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 788 ) 789 790 return json_object 791 792 def _parse_bracket( 793 self, this: t.Optional[exp.Expression] = None 794 ) -> t.Optional[exp.Expression]: 795 bracket = super()._parse_bracket(this) 796 797 if this is bracket: 798 return bracket 799 800 if isinstance(bracket, exp.Bracket): 801 for expression in bracket.expressions: 802 name = expression.name.upper() 803 804 if name not in self.BRACKET_OFFSETS: 805 break 806 807 offset, safe = self.BRACKET_OFFSETS[name] 808 bracket.set("offset", offset) 809 bracket.set("safe", safe) 810 expression.replace(expression.expressions[0]) 811 812 return bracket 813 814 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 815 unnest = super()._parse_unnest(with_alias=with_alias) 816 817 if not unnest: 818 return None 819 820 unnest_expr = seq_get(unnest.expressions, 0) 821 if unnest_expr: 822 from sqlglot.optimizer.annotate_types import annotate_types 823 824 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 825 826 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 827 # in contrast to other dialects such as DuckDB which flattens only the array by default 828 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 829 array_elem.is_type(exp.DataType.Type.STRUCT) 830 for array_elem in unnest_expr._type.expressions 831 ): 832 unnest.set("explode_array", True) 833 834 return unnest 835 836 def _parse_make_interval(self) -> exp.MakeInterval: 837 expr = exp.MakeInterval() 838 839 for arg_key in expr.arg_types: 840 value = self._parse_lambda() 841 842 if not value: 843 break 844 845 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 846 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 847 if isinstance(value, exp.Kwarg): 848 arg_key = value.this.name 849 850 expr.set(arg_key, value) 851 852 self._match(TokenType.COMMA) 853 854 return expr 855 856 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 857 expr = self.expression( 858 exp.FeaturesAtTime, 859 this=(self._match(TokenType.TABLE) and self._parse_table()) 860 or self._parse_select(nested=True), 861 ) 862 863 while self._match(TokenType.COMMA): 864 arg = self._parse_lambda() 865 866 # Get the LHS of the Kwarg and set the arg to that value, e.g 867 # "num_rows => 1" sets the expr's `num_rows` arg 868 if arg: 869 expr.set(arg.this.name, arg) 870 871 return expr 872 873 def _parse_export_data(self) -> exp.Export: 874 self._match_text_seq("DATA") 875 876 return self.expression( 877 exp.Export, 878 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 879 options=self._parse_properties(), 880 this=self._match_text_seq("AS") and self._parse_select(), 881 ) 882 883 class Generator(generator.Generator): 884 INTERVAL_ALLOWS_PLURAL_FORM = False 885 JOIN_HINTS = False 886 QUERY_HINTS = False 887 TABLE_HINTS = False 888 LIMIT_FETCH = "LIMIT" 889 RENAME_TABLE_WITH_DB = False 890 NVL2_SUPPORTED = False 891 UNNEST_WITH_ORDINALITY = False 892 COLLATE_IS_FUNC = True 893 LIMIT_ONLY_LITERALS = True 894 SUPPORTS_TABLE_ALIAS_COLUMNS = False 895 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 896 JSON_KEY_VALUE_PAIR_SEP = "," 897 NULL_ORDERING_SUPPORTED = False 898 IGNORE_NULLS_IN_FUNC = True 899 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 900 CAN_IMPLEMENT_ARRAY_ANY = True 901 SUPPORTS_TO_NUMBER = False 902 NAMED_PLACEHOLDER_TOKEN = "@" 903 HEX_FUNC = "TO_HEX" 904 WITH_PROPERTIES_PREFIX = "OPTIONS" 905 SUPPORTS_EXPLODING_PROJECTIONS = False 906 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 907 SUPPORTS_UNIX_SECONDS = True 908 909 TRANSFORMS = { 910 **generator.Generator.TRANSFORMS, 911 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 912 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 913 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 914 exp.Array: inline_array_unless_query, 915 exp.ArrayContains: _array_contains_sql, 916 exp.ArrayFilter: filter_array_using_unnest, 917 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 918 exp.CollateProperty: lambda self, e: ( 919 f"DEFAULT COLLATE {self.sql(e, 'this')}" 920 if e.args.get("default") 921 else f"COLLATE {self.sql(e, 'this')}" 922 ), 923 exp.Commit: lambda *_: "COMMIT TRANSACTION", 924 exp.CountIf: rename_func("COUNTIF"), 925 exp.Create: _create_sql, 926 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 927 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 928 exp.DateDiff: lambda self, e: self.func( 929 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 930 ), 931 exp.DateFromParts: rename_func("DATE"), 932 exp.DateStrToDate: datestrtodate_sql, 933 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 934 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 935 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 936 exp.DateTrunc: lambda self, e: self.func( 937 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 938 ), 939 exp.FromTimeZone: lambda self, e: self.func( 940 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 941 ), 942 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 943 exp.GroupConcat: lambda self, e: groupconcat_sql( 944 self, e, func_name="STRING_AGG", within_group=False 945 ), 946 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 947 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 948 exp.If: if_sql(false_value="NULL"), 949 exp.ILike: no_ilike_sql, 950 exp.IntDiv: rename_func("DIV"), 951 exp.Int64: rename_func("INT64"), 952 exp.JSONExtract: _json_extract_sql, 953 exp.JSONExtractArray: _json_extract_sql, 954 exp.JSONExtractScalar: _json_extract_sql, 955 exp.JSONFormat: rename_func("TO_JSON_STRING"), 956 exp.Levenshtein: _levenshtein_sql, 957 exp.Max: max_or_greatest, 958 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 959 exp.MD5Digest: rename_func("MD5"), 960 exp.Min: min_or_least, 961 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 962 exp.RegexpExtract: lambda self, e: self.func( 963 "REGEXP_EXTRACT", 964 e.this, 965 e.expression, 966 e.args.get("position"), 967 e.args.get("occurrence"), 968 ), 969 exp.RegexpExtractAll: lambda self, e: self.func( 970 "REGEXP_EXTRACT_ALL", e.this, e.expression 971 ), 972 exp.RegexpReplace: regexp_replace_sql, 973 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 974 exp.ReturnsProperty: _returnsproperty_sql, 975 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 976 exp.Select: transforms.preprocess( 977 [ 978 transforms.explode_projection_to_unnest(), 979 transforms.unqualify_unnest, 980 transforms.eliminate_distinct_on, 981 _alias_ordered_group, 982 transforms.eliminate_semi_and_anti_joins, 983 ] 984 ), 985 exp.SHA: rename_func("SHA1"), 986 exp.SHA2: sha256_sql, 987 exp.StabilityProperty: lambda self, e: ( 988 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 989 ), 990 exp.String: rename_func("STRING"), 991 exp.StrPosition: lambda self, e: ( 992 strposition_sql( 993 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 994 ) 995 ), 996 exp.StrToDate: _str_to_datetime_sql, 997 exp.StrToTime: _str_to_datetime_sql, 998 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 999 exp.TimeFromParts: rename_func("TIME"), 1000 exp.TimestampFromParts: rename_func("DATETIME"), 1001 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1002 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1003 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1004 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1005 exp.TimeStrToTime: timestrtotime_sql, 1006 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1007 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1008 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1009 exp.TsOrDsToTime: rename_func("TIME"), 1010 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1011 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1012 exp.Unhex: rename_func("FROM_HEX"), 1013 exp.UnixDate: rename_func("UNIX_DATE"), 1014 exp.UnixToTime: _unix_to_time_sql, 1015 exp.Uuid: lambda *_: "GENERATE_UUID()", 1016 exp.Values: _derived_table_values_to_unnest, 1017 exp.VariancePop: rename_func("VAR_POP"), 1018 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1019 } 1020 1021 SUPPORTED_JSON_PATH_PARTS = { 1022 exp.JSONPathKey, 1023 exp.JSONPathRoot, 1024 exp.JSONPathSubscript, 1025 } 1026 1027 TYPE_MAPPING = { 1028 **generator.Generator.TYPE_MAPPING, 1029 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1030 exp.DataType.Type.BIGINT: "INT64", 1031 exp.DataType.Type.BINARY: "BYTES", 1032 exp.DataType.Type.BLOB: "BYTES", 1033 exp.DataType.Type.BOOLEAN: "BOOL", 1034 exp.DataType.Type.CHAR: "STRING", 1035 exp.DataType.Type.DECIMAL: "NUMERIC", 1036 exp.DataType.Type.DOUBLE: "FLOAT64", 1037 exp.DataType.Type.FLOAT: "FLOAT64", 1038 exp.DataType.Type.INT: "INT64", 1039 exp.DataType.Type.NCHAR: "STRING", 1040 exp.DataType.Type.NVARCHAR: "STRING", 1041 exp.DataType.Type.SMALLINT: "INT64", 1042 exp.DataType.Type.TEXT: "STRING", 1043 exp.DataType.Type.TIMESTAMP: "DATETIME", 1044 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1045 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1046 exp.DataType.Type.TINYINT: "INT64", 1047 exp.DataType.Type.ROWVERSION: "BYTES", 1048 exp.DataType.Type.UUID: "STRING", 1049 exp.DataType.Type.VARBINARY: "BYTES", 1050 exp.DataType.Type.VARCHAR: "STRING", 1051 exp.DataType.Type.VARIANT: "ANY TYPE", 1052 } 1053 1054 PROPERTIES_LOCATION = { 1055 **generator.Generator.PROPERTIES_LOCATION, 1056 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1057 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1058 } 1059 1060 # WINDOW comes after QUALIFY 1061 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1062 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1063 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1064 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1065 } 1066 1067 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1068 RESERVED_KEYWORDS = { 1069 "all", 1070 "and", 1071 "any", 1072 "array", 1073 "as", 1074 "asc", 1075 "assert_rows_modified", 1076 "at", 1077 "between", 1078 "by", 1079 "case", 1080 "cast", 1081 "collate", 1082 "contains", 1083 "create", 1084 "cross", 1085 "cube", 1086 "current", 1087 "default", 1088 "define", 1089 "desc", 1090 "distinct", 1091 "else", 1092 "end", 1093 "enum", 1094 "escape", 1095 "except", 1096 "exclude", 1097 "exists", 1098 "extract", 1099 "false", 1100 "fetch", 1101 "following", 1102 "for", 1103 "from", 1104 "full", 1105 "group", 1106 "grouping", 1107 "groups", 1108 "hash", 1109 "having", 1110 "if", 1111 "ignore", 1112 "in", 1113 "inner", 1114 "intersect", 1115 "interval", 1116 "into", 1117 "is", 1118 "join", 1119 "lateral", 1120 "left", 1121 "like", 1122 "limit", 1123 "lookup", 1124 "merge", 1125 "natural", 1126 "new", 1127 "no", 1128 "not", 1129 "null", 1130 "nulls", 1131 "of", 1132 "on", 1133 "or", 1134 "order", 1135 "outer", 1136 "over", 1137 "partition", 1138 "preceding", 1139 "proto", 1140 "qualify", 1141 "range", 1142 "recursive", 1143 "respect", 1144 "right", 1145 "rollup", 1146 "rows", 1147 "select", 1148 "set", 1149 "some", 1150 "struct", 1151 "tablesample", 1152 "then", 1153 "to", 1154 "treat", 1155 "true", 1156 "unbounded", 1157 "union", 1158 "unnest", 1159 "using", 1160 "when", 1161 "where", 1162 "window", 1163 "with", 1164 "within", 1165 } 1166 1167 def mod_sql(self, expression: exp.Mod) -> str: 1168 this = expression.this 1169 expr = expression.expression 1170 return self.func( 1171 "MOD", 1172 this.unnest() if isinstance(this, exp.Paren) else this, 1173 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1174 ) 1175 1176 def column_parts(self, expression: exp.Column) -> str: 1177 if expression.meta.get("quoted_column"): 1178 # If a column reference is of the form `dataset.table`.name, we need 1179 # to preserve the quoted table path, otherwise the reference breaks 1180 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1181 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1182 return f"{table_path}.{self.sql(expression, 'this')}" 1183 1184 return super().column_parts(expression) 1185 1186 def table_parts(self, expression: exp.Table) -> str: 1187 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1188 # we need to make sure the correct quoting is used in each case. 1189 # 1190 # For example, if there is a CTE x that clashes with a schema name, then the former will 1191 # return the table y in that schema, whereas the latter will return the CTE's y column: 1192 # 1193 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1194 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1195 if expression.meta.get("quoted_table"): 1196 table_parts = ".".join(p.name for p in expression.parts) 1197 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1198 1199 return super().table_parts(expression) 1200 1201 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1202 this = expression.this 1203 if isinstance(this, exp.TsOrDsToDatetime): 1204 func_name = "FORMAT_DATETIME" 1205 elif isinstance(this, exp.TsOrDsToTimestamp): 1206 func_name = "FORMAT_TIMESTAMP" 1207 else: 1208 func_name = "FORMAT_DATE" 1209 1210 time_expr = ( 1211 this 1212 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1213 else expression 1214 ) 1215 return self.func( 1216 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1217 ) 1218 1219 def eq_sql(self, expression: exp.EQ) -> str: 1220 # Operands of = cannot be NULL in BigQuery 1221 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1222 if not isinstance(expression.parent, exp.Update): 1223 return "NULL" 1224 1225 return self.binary(expression, "=") 1226 1227 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1228 parent = expression.parent 1229 1230 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1231 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1232 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1233 return self.func( 1234 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1235 ) 1236 1237 return super().attimezone_sql(expression) 1238 1239 def trycast_sql(self, expression: exp.TryCast) -> str: 1240 return self.cast_sql(expression, safe_prefix="SAFE_") 1241 1242 def bracket_sql(self, expression: exp.Bracket) -> str: 1243 this = expression.this 1244 expressions = expression.expressions 1245 1246 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1247 arg = expressions[0] 1248 if arg.type is None: 1249 from sqlglot.optimizer.annotate_types import annotate_types 1250 1251 arg = annotate_types(arg, dialect=self.dialect) 1252 1253 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1254 # BQ doesn't support bracket syntax with string values for structs 1255 return f"{self.sql(this)}.{arg.name}" 1256 1257 expressions_sql = self.expressions(expression, flat=True) 1258 offset = expression.args.get("offset") 1259 1260 if offset == 0: 1261 expressions_sql = f"OFFSET({expressions_sql})" 1262 elif offset == 1: 1263 expressions_sql = f"ORDINAL({expressions_sql})" 1264 elif offset is not None: 1265 self.unsupported(f"Unsupported array offset: {offset}") 1266 1267 if expression.args.get("safe"): 1268 expressions_sql = f"SAFE_{expressions_sql}" 1269 1270 return f"{self.sql(this)}[{expressions_sql}]" 1271 1272 def in_unnest_op(self, expression: exp.Unnest) -> str: 1273 return self.sql(expression) 1274 1275 def version_sql(self, expression: exp.Version) -> str: 1276 if expression.name == "TIMESTAMP": 1277 expression.set("this", "SYSTEM_TIME") 1278 return super().version_sql(expression) 1279 1280 def contains_sql(self, expression: exp.Contains) -> str: 1281 this = expression.this 1282 expr = expression.expression 1283 1284 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1285 this = this.this 1286 expr = expr.this 1287 1288 return self.func("CONTAINS_SUBSTR", this, expr) 1289 1290 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1291 this = expression.this 1292 1293 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1294 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1295 # because they aren't literals and so the above syntax is invalid BigQuery. 1296 if isinstance(this, exp.Array): 1297 elem = seq_get(this.expressions, 0) 1298 if not (elem and elem.find(exp.Query)): 1299 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1300 1301 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
441 def normalize_identifier(self, expression: E) -> E: 442 if ( 443 isinstance(expression, exp.Identifier) 444 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 445 ): 446 parent = expression.parent 447 while isinstance(parent, exp.Dot): 448 parent = parent.parent 449 450 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 451 # by default. The following check uses a heuristic to detect tables based on whether 452 # they are qualified. This should generally be correct, because tables in BigQuery 453 # must be qualified with at least a dataset, unless @@dataset_id is set. 454 case_sensitive = ( 455 isinstance(parent, exp.UserDefinedFunction) 456 or ( 457 isinstance(parent, exp.Table) 458 and parent.db 459 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 460 ) 461 or expression.meta.get("is_table") 462 ) 463 if not case_sensitive: 464 expression.set("this", expression.this.lower()) 465 466 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
468 class Tokenizer(tokens.Tokenizer): 469 QUOTES = ["'", '"', '"""', "'''"] 470 COMMENTS = ["--", "#", ("/*", "*/")] 471 IDENTIFIERS = ["`"] 472 STRING_ESCAPES = ["\\"] 473 474 HEX_STRINGS = [("0x", ""), ("0X", "")] 475 476 BYTE_STRINGS = [ 477 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 478 ] 479 480 RAW_STRINGS = [ 481 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 482 ] 483 484 KEYWORDS = { 485 **tokens.Tokenizer.KEYWORDS, 486 "ANY TYPE": TokenType.VARIANT, 487 "BEGIN": TokenType.COMMAND, 488 "BEGIN TRANSACTION": TokenType.BEGIN, 489 "BYTEINT": TokenType.INT, 490 "BYTES": TokenType.BINARY, 491 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 492 "DATETIME": TokenType.TIMESTAMP, 493 "DECLARE": TokenType.COMMAND, 494 "ELSEIF": TokenType.COMMAND, 495 "EXCEPTION": TokenType.COMMAND, 496 "EXPORT": TokenType.EXPORT, 497 "FLOAT64": TokenType.DOUBLE, 498 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 499 "MODEL": TokenType.MODEL, 500 "NOT DETERMINISTIC": TokenType.VOLATILE, 501 "RECORD": TokenType.STRUCT, 502 "TIMESTAMP": TokenType.TIMESTAMPTZ, 503 } 504 KEYWORDS.pop("DIV") 505 KEYWORDS.pop("VALUES") 506 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
508 class Parser(parser.Parser): 509 PREFIXED_PIVOT_COLUMNS = True 510 LOG_DEFAULTS_TO_LN = True 511 SUPPORTS_IMPLICIT_UNNEST = True 512 513 FUNCTIONS = { 514 **parser.Parser.FUNCTIONS, 515 "CONTAINS_SUBSTR": _build_contains_substring, 516 "DATE": _build_date, 517 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 518 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 519 "DATE_TRUNC": lambda args: exp.DateTrunc( 520 unit=exp.Literal.string(str(seq_get(args, 1))), 521 this=seq_get(args, 0), 522 zone=seq_get(args, 2), 523 ), 524 "DATETIME": _build_datetime, 525 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 526 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 527 "DIV": binary_from_function(exp.IntDiv), 528 "EDIT_DISTANCE": _build_levenshtein, 529 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 530 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 531 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 532 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 533 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 534 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 535 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 536 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 537 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 538 "MD5": exp.MD5Digest.from_arg_list, 539 "TO_HEX": _build_to_hex, 540 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 541 [seq_get(args, 1), seq_get(args, 0)] 542 ), 543 "PARSE_TIMESTAMP": _build_parse_timestamp, 544 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 545 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 546 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 547 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 548 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 549 ), 550 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 551 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 552 "SPLIT": lambda args: exp.Split( 553 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 554 this=seq_get(args, 0), 555 expression=seq_get(args, 1) or exp.Literal.string(","), 556 ), 557 "STRPOS": exp.StrPosition.from_arg_list, 558 "TIME": _build_time, 559 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 560 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 561 "TIMESTAMP": _build_timestamp, 562 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 563 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 564 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 565 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 566 ), 567 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 568 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 569 ), 570 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 571 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 572 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 573 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 574 } 575 576 FUNCTION_PARSERS = { 577 **parser.Parser.FUNCTION_PARSERS, 578 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 579 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 580 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 581 } 582 FUNCTION_PARSERS.pop("TRIM") 583 584 NO_PAREN_FUNCTIONS = { 585 **parser.Parser.NO_PAREN_FUNCTIONS, 586 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 587 } 588 589 NESTED_TYPE_TOKENS = { 590 *parser.Parser.NESTED_TYPE_TOKENS, 591 TokenType.TABLE, 592 } 593 594 PROPERTY_PARSERS = { 595 **parser.Parser.PROPERTY_PARSERS, 596 "NOT DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 598 ), 599 "OPTIONS": lambda self: self._parse_with_property(), 600 } 601 602 CONSTRAINT_PARSERS = { 603 **parser.Parser.CONSTRAINT_PARSERS, 604 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 605 } 606 607 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 608 RANGE_PARSERS.pop(TokenType.OVERLAPS) 609 610 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 611 612 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 613 614 STATEMENT_PARSERS = { 615 **parser.Parser.STATEMENT_PARSERS, 616 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 617 TokenType.END: lambda self: self._parse_as_command(self._prev), 618 TokenType.FOR: lambda self: self._parse_for_in(), 619 TokenType.EXPORT: lambda self: self._parse_export_data(), 620 } 621 622 BRACKET_OFFSETS = { 623 "OFFSET": (0, False), 624 "ORDINAL": (1, False), 625 "SAFE_OFFSET": (0, True), 626 "SAFE_ORDINAL": (1, True), 627 } 628 629 def _parse_for_in(self) -> exp.ForIn: 630 this = self._parse_range() 631 self._match_text_seq("DO") 632 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 633 634 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 635 this = super()._parse_table_part(schema=schema) or self._parse_number() 636 637 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 638 if isinstance(this, exp.Identifier): 639 table_name = this.name 640 while self._match(TokenType.DASH, advance=False) and self._next: 641 start = self._curr 642 while self._is_connected() and not self._match_set( 643 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 644 ): 645 self._advance() 646 647 table_name += self._find_sql(start, self._prev) 648 649 this = exp.Identifier( 650 this=table_name, quoted=this.args.get("quoted") 651 ).update_positions(this) 652 elif isinstance(this, exp.Literal): 653 table_name = this.name 654 655 if self._is_connected() and self._parse_var(any_token=True): 656 table_name += self._prev.text 657 658 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 659 660 return this 661 662 def _parse_table_parts( 663 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 664 ) -> exp.Table: 665 table = super()._parse_table_parts( 666 schema=schema, is_db_reference=is_db_reference, wildcard=True 667 ) 668 669 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 670 if not table.catalog: 671 if table.db: 672 previous_db = table.args["db"] 673 parts = table.db.split(".") 674 if len(parts) == 2 and not table.args["db"].quoted: 675 table.set( 676 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 677 ) 678 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 679 else: 680 previous_this = table.this 681 parts = table.name.split(".") 682 if len(parts) == 2 and not table.this.quoted: 683 table.set( 684 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 685 ) 686 table.set( 687 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 688 ) 689 690 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 691 alias = table.this 692 catalog, db, this, *rest = ( 693 exp.to_identifier(p, quoted=True) 694 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 695 ) 696 697 for part in (catalog, db, this): 698 if part: 699 part.update_positions(table.this) 700 701 if rest and this: 702 this = exp.Dot.build([this, *rest]) # type: ignore 703 704 table = exp.Table( 705 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 706 ) 707 table.meta["quoted_table"] = True 708 else: 709 alias = None 710 711 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 712 # dataset, so if the project identifier is omitted we need to fix the ast so that 713 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 714 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 715 # views, because it would seem like the "catalog" part is set, when it'd actually 716 # be the region/dataset. Merging the two identifiers into a single one is done to 717 # avoid producing a 4-part Table reference, which would cause issues in the schema 718 # module, when there are 3-part table names mixed with information schema views. 719 # 720 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 721 table_parts = table.parts 722 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 723 # We need to alias the table here to avoid breaking existing qualified columns. 724 # This is expected to be safe, because if there's an actual alias coming up in 725 # the token stream, it will overwrite this one. If there isn't one, we are only 726 # exposing the name that can be used to reference the view explicitly (a no-op). 727 exp.alias_( 728 table, 729 t.cast(exp.Identifier, alias or table_parts[-1]), 730 table=True, 731 copy=False, 732 ) 733 734 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 735 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 736 line=table_parts[-2].meta.get("line"), 737 col=table_parts[-1].meta.get("col"), 738 start=table_parts[-2].meta.get("start"), 739 end=table_parts[-1].meta.get("end"), 740 ) 741 table.set("this", new_this) 742 table.set("db", seq_get(table_parts, -3)) 743 table.set("catalog", seq_get(table_parts, -4)) 744 745 return table 746 747 def _parse_column(self) -> t.Optional[exp.Expression]: 748 column = super()._parse_column() 749 if isinstance(column, exp.Column): 750 parts = column.parts 751 if any("." in p.name for p in parts): 752 catalog, db, table, this, *rest = ( 753 exp.to_identifier(p, quoted=True) 754 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 755 ) 756 757 if rest and this: 758 this = exp.Dot.build([this, *rest]) # type: ignore 759 760 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 761 column.meta["quoted_column"] = True 762 763 return column 764 765 @t.overload 766 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 767 768 @t.overload 769 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 770 771 def _parse_json_object(self, agg=False): 772 json_object = super()._parse_json_object() 773 array_kv_pair = seq_get(json_object.expressions, 0) 774 775 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 776 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 777 if ( 778 array_kv_pair 779 and isinstance(array_kv_pair.this, exp.Array) 780 and isinstance(array_kv_pair.expression, exp.Array) 781 ): 782 keys = array_kv_pair.this.expressions 783 values = array_kv_pair.expression.expressions 784 785 json_object.set( 786 "expressions", 787 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 788 ) 789 790 return json_object 791 792 def _parse_bracket( 793 self, this: t.Optional[exp.Expression] = None 794 ) -> t.Optional[exp.Expression]: 795 bracket = super()._parse_bracket(this) 796 797 if this is bracket: 798 return bracket 799 800 if isinstance(bracket, exp.Bracket): 801 for expression in bracket.expressions: 802 name = expression.name.upper() 803 804 if name not in self.BRACKET_OFFSETS: 805 break 806 807 offset, safe = self.BRACKET_OFFSETS[name] 808 bracket.set("offset", offset) 809 bracket.set("safe", safe) 810 expression.replace(expression.expressions[0]) 811 812 return bracket 813 814 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 815 unnest = super()._parse_unnest(with_alias=with_alias) 816 817 if not unnest: 818 return None 819 820 unnest_expr = seq_get(unnest.expressions, 0) 821 if unnest_expr: 822 from sqlglot.optimizer.annotate_types import annotate_types 823 824 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 825 826 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 827 # in contrast to other dialects such as DuckDB which flattens only the array by default 828 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 829 array_elem.is_type(exp.DataType.Type.STRUCT) 830 for array_elem in unnest_expr._type.expressions 831 ): 832 unnest.set("explode_array", True) 833 834 return unnest 835 836 def _parse_make_interval(self) -> exp.MakeInterval: 837 expr = exp.MakeInterval() 838 839 for arg_key in expr.arg_types: 840 value = self._parse_lambda() 841 842 if not value: 843 break 844 845 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 846 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 847 if isinstance(value, exp.Kwarg): 848 arg_key = value.this.name 849 850 expr.set(arg_key, value) 851 852 self._match(TokenType.COMMA) 853 854 return expr 855 856 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 857 expr = self.expression( 858 exp.FeaturesAtTime, 859 this=(self._match(TokenType.TABLE) and self._parse_table()) 860 or self._parse_select(nested=True), 861 ) 862 863 while self._match(TokenType.COMMA): 864 arg = self._parse_lambda() 865 866 # Get the LHS of the Kwarg and set the arg to that value, e.g 867 # "num_rows => 1" sets the expr's `num_rows` arg 868 if arg: 869 expr.set(arg.this.name, arg) 870 871 return expr 872 873 def _parse_export_data(self) -> exp.Export: 874 self._match_text_seq("DATA") 875 876 return self.expression( 877 exp.Export, 878 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 879 options=self._parse_properties(), 880 this=self._match_text_seq("AS") and self._parse_select(), 881 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
883 class Generator(generator.Generator): 884 INTERVAL_ALLOWS_PLURAL_FORM = False 885 JOIN_HINTS = False 886 QUERY_HINTS = False 887 TABLE_HINTS = False 888 LIMIT_FETCH = "LIMIT" 889 RENAME_TABLE_WITH_DB = False 890 NVL2_SUPPORTED = False 891 UNNEST_WITH_ORDINALITY = False 892 COLLATE_IS_FUNC = True 893 LIMIT_ONLY_LITERALS = True 894 SUPPORTS_TABLE_ALIAS_COLUMNS = False 895 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 896 JSON_KEY_VALUE_PAIR_SEP = "," 897 NULL_ORDERING_SUPPORTED = False 898 IGNORE_NULLS_IN_FUNC = True 899 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 900 CAN_IMPLEMENT_ARRAY_ANY = True 901 SUPPORTS_TO_NUMBER = False 902 NAMED_PLACEHOLDER_TOKEN = "@" 903 HEX_FUNC = "TO_HEX" 904 WITH_PROPERTIES_PREFIX = "OPTIONS" 905 SUPPORTS_EXPLODING_PROJECTIONS = False 906 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 907 SUPPORTS_UNIX_SECONDS = True 908 909 TRANSFORMS = { 910 **generator.Generator.TRANSFORMS, 911 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 912 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 913 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 914 exp.Array: inline_array_unless_query, 915 exp.ArrayContains: _array_contains_sql, 916 exp.ArrayFilter: filter_array_using_unnest, 917 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 918 exp.CollateProperty: lambda self, e: ( 919 f"DEFAULT COLLATE {self.sql(e, 'this')}" 920 if e.args.get("default") 921 else f"COLLATE {self.sql(e, 'this')}" 922 ), 923 exp.Commit: lambda *_: "COMMIT TRANSACTION", 924 exp.CountIf: rename_func("COUNTIF"), 925 exp.Create: _create_sql, 926 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 927 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 928 exp.DateDiff: lambda self, e: self.func( 929 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 930 ), 931 exp.DateFromParts: rename_func("DATE"), 932 exp.DateStrToDate: datestrtodate_sql, 933 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 934 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 935 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 936 exp.DateTrunc: lambda self, e: self.func( 937 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 938 ), 939 exp.FromTimeZone: lambda self, e: self.func( 940 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 941 ), 942 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 943 exp.GroupConcat: lambda self, e: groupconcat_sql( 944 self, e, func_name="STRING_AGG", within_group=False 945 ), 946 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 947 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 948 exp.If: if_sql(false_value="NULL"), 949 exp.ILike: no_ilike_sql, 950 exp.IntDiv: rename_func("DIV"), 951 exp.Int64: rename_func("INT64"), 952 exp.JSONExtract: _json_extract_sql, 953 exp.JSONExtractArray: _json_extract_sql, 954 exp.JSONExtractScalar: _json_extract_sql, 955 exp.JSONFormat: rename_func("TO_JSON_STRING"), 956 exp.Levenshtein: _levenshtein_sql, 957 exp.Max: max_or_greatest, 958 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 959 exp.MD5Digest: rename_func("MD5"), 960 exp.Min: min_or_least, 961 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 962 exp.RegexpExtract: lambda self, e: self.func( 963 "REGEXP_EXTRACT", 964 e.this, 965 e.expression, 966 e.args.get("position"), 967 e.args.get("occurrence"), 968 ), 969 exp.RegexpExtractAll: lambda self, e: self.func( 970 "REGEXP_EXTRACT_ALL", e.this, e.expression 971 ), 972 exp.RegexpReplace: regexp_replace_sql, 973 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 974 exp.ReturnsProperty: _returnsproperty_sql, 975 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 976 exp.Select: transforms.preprocess( 977 [ 978 transforms.explode_projection_to_unnest(), 979 transforms.unqualify_unnest, 980 transforms.eliminate_distinct_on, 981 _alias_ordered_group, 982 transforms.eliminate_semi_and_anti_joins, 983 ] 984 ), 985 exp.SHA: rename_func("SHA1"), 986 exp.SHA2: sha256_sql, 987 exp.StabilityProperty: lambda self, e: ( 988 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 989 ), 990 exp.String: rename_func("STRING"), 991 exp.StrPosition: lambda self, e: ( 992 strposition_sql( 993 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 994 ) 995 ), 996 exp.StrToDate: _str_to_datetime_sql, 997 exp.StrToTime: _str_to_datetime_sql, 998 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 999 exp.TimeFromParts: rename_func("TIME"), 1000 exp.TimestampFromParts: rename_func("DATETIME"), 1001 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1002 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1003 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1004 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1005 exp.TimeStrToTime: timestrtotime_sql, 1006 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1007 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1008 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1009 exp.TsOrDsToTime: rename_func("TIME"), 1010 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1011 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1012 exp.Unhex: rename_func("FROM_HEX"), 1013 exp.UnixDate: rename_func("UNIX_DATE"), 1014 exp.UnixToTime: _unix_to_time_sql, 1015 exp.Uuid: lambda *_: "GENERATE_UUID()", 1016 exp.Values: _derived_table_values_to_unnest, 1017 exp.VariancePop: rename_func("VAR_POP"), 1018 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1019 } 1020 1021 SUPPORTED_JSON_PATH_PARTS = { 1022 exp.JSONPathKey, 1023 exp.JSONPathRoot, 1024 exp.JSONPathSubscript, 1025 } 1026 1027 TYPE_MAPPING = { 1028 **generator.Generator.TYPE_MAPPING, 1029 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1030 exp.DataType.Type.BIGINT: "INT64", 1031 exp.DataType.Type.BINARY: "BYTES", 1032 exp.DataType.Type.BLOB: "BYTES", 1033 exp.DataType.Type.BOOLEAN: "BOOL", 1034 exp.DataType.Type.CHAR: "STRING", 1035 exp.DataType.Type.DECIMAL: "NUMERIC", 1036 exp.DataType.Type.DOUBLE: "FLOAT64", 1037 exp.DataType.Type.FLOAT: "FLOAT64", 1038 exp.DataType.Type.INT: "INT64", 1039 exp.DataType.Type.NCHAR: "STRING", 1040 exp.DataType.Type.NVARCHAR: "STRING", 1041 exp.DataType.Type.SMALLINT: "INT64", 1042 exp.DataType.Type.TEXT: "STRING", 1043 exp.DataType.Type.TIMESTAMP: "DATETIME", 1044 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1045 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1046 exp.DataType.Type.TINYINT: "INT64", 1047 exp.DataType.Type.ROWVERSION: "BYTES", 1048 exp.DataType.Type.UUID: "STRING", 1049 exp.DataType.Type.VARBINARY: "BYTES", 1050 exp.DataType.Type.VARCHAR: "STRING", 1051 exp.DataType.Type.VARIANT: "ANY TYPE", 1052 } 1053 1054 PROPERTIES_LOCATION = { 1055 **generator.Generator.PROPERTIES_LOCATION, 1056 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1057 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1058 } 1059 1060 # WINDOW comes after QUALIFY 1061 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1062 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1063 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1064 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1065 } 1066 1067 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1068 RESERVED_KEYWORDS = { 1069 "all", 1070 "and", 1071 "any", 1072 "array", 1073 "as", 1074 "asc", 1075 "assert_rows_modified", 1076 "at", 1077 "between", 1078 "by", 1079 "case", 1080 "cast", 1081 "collate", 1082 "contains", 1083 "create", 1084 "cross", 1085 "cube", 1086 "current", 1087 "default", 1088 "define", 1089 "desc", 1090 "distinct", 1091 "else", 1092 "end", 1093 "enum", 1094 "escape", 1095 "except", 1096 "exclude", 1097 "exists", 1098 "extract", 1099 "false", 1100 "fetch", 1101 "following", 1102 "for", 1103 "from", 1104 "full", 1105 "group", 1106 "grouping", 1107 "groups", 1108 "hash", 1109 "having", 1110 "if", 1111 "ignore", 1112 "in", 1113 "inner", 1114 "intersect", 1115 "interval", 1116 "into", 1117 "is", 1118 "join", 1119 "lateral", 1120 "left", 1121 "like", 1122 "limit", 1123 "lookup", 1124 "merge", 1125 "natural", 1126 "new", 1127 "no", 1128 "not", 1129 "null", 1130 "nulls", 1131 "of", 1132 "on", 1133 "or", 1134 "order", 1135 "outer", 1136 "over", 1137 "partition", 1138 "preceding", 1139 "proto", 1140 "qualify", 1141 "range", 1142 "recursive", 1143 "respect", 1144 "right", 1145 "rollup", 1146 "rows", 1147 "select", 1148 "set", 1149 "some", 1150 "struct", 1151 "tablesample", 1152 "then", 1153 "to", 1154 "treat", 1155 "true", 1156 "unbounded", 1157 "union", 1158 "unnest", 1159 "using", 1160 "when", 1161 "where", 1162 "window", 1163 "with", 1164 "within", 1165 } 1166 1167 def mod_sql(self, expression: exp.Mod) -> str: 1168 this = expression.this 1169 expr = expression.expression 1170 return self.func( 1171 "MOD", 1172 this.unnest() if isinstance(this, exp.Paren) else this, 1173 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1174 ) 1175 1176 def column_parts(self, expression: exp.Column) -> str: 1177 if expression.meta.get("quoted_column"): 1178 # If a column reference is of the form `dataset.table`.name, we need 1179 # to preserve the quoted table path, otherwise the reference breaks 1180 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1181 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1182 return f"{table_path}.{self.sql(expression, 'this')}" 1183 1184 return super().column_parts(expression) 1185 1186 def table_parts(self, expression: exp.Table) -> str: 1187 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1188 # we need to make sure the correct quoting is used in each case. 1189 # 1190 # For example, if there is a CTE x that clashes with a schema name, then the former will 1191 # return the table y in that schema, whereas the latter will return the CTE's y column: 1192 # 1193 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1194 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1195 if expression.meta.get("quoted_table"): 1196 table_parts = ".".join(p.name for p in expression.parts) 1197 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1198 1199 return super().table_parts(expression) 1200 1201 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1202 this = expression.this 1203 if isinstance(this, exp.TsOrDsToDatetime): 1204 func_name = "FORMAT_DATETIME" 1205 elif isinstance(this, exp.TsOrDsToTimestamp): 1206 func_name = "FORMAT_TIMESTAMP" 1207 else: 1208 func_name = "FORMAT_DATE" 1209 1210 time_expr = ( 1211 this 1212 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1213 else expression 1214 ) 1215 return self.func( 1216 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1217 ) 1218 1219 def eq_sql(self, expression: exp.EQ) -> str: 1220 # Operands of = cannot be NULL in BigQuery 1221 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1222 if not isinstance(expression.parent, exp.Update): 1223 return "NULL" 1224 1225 return self.binary(expression, "=") 1226 1227 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1228 parent = expression.parent 1229 1230 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1231 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1232 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1233 return self.func( 1234 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1235 ) 1236 1237 return super().attimezone_sql(expression) 1238 1239 def trycast_sql(self, expression: exp.TryCast) -> str: 1240 return self.cast_sql(expression, safe_prefix="SAFE_") 1241 1242 def bracket_sql(self, expression: exp.Bracket) -> str: 1243 this = expression.this 1244 expressions = expression.expressions 1245 1246 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1247 arg = expressions[0] 1248 if arg.type is None: 1249 from sqlglot.optimizer.annotate_types import annotate_types 1250 1251 arg = annotate_types(arg, dialect=self.dialect) 1252 1253 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1254 # BQ doesn't support bracket syntax with string values for structs 1255 return f"{self.sql(this)}.{arg.name}" 1256 1257 expressions_sql = self.expressions(expression, flat=True) 1258 offset = expression.args.get("offset") 1259 1260 if offset == 0: 1261 expressions_sql = f"OFFSET({expressions_sql})" 1262 elif offset == 1: 1263 expressions_sql = f"ORDINAL({expressions_sql})" 1264 elif offset is not None: 1265 self.unsupported(f"Unsupported array offset: {offset}") 1266 1267 if expression.args.get("safe"): 1268 expressions_sql = f"SAFE_{expressions_sql}" 1269 1270 return f"{self.sql(this)}[{expressions_sql}]" 1271 1272 def in_unnest_op(self, expression: exp.Unnest) -> str: 1273 return self.sql(expression) 1274 1275 def version_sql(self, expression: exp.Version) -> str: 1276 if expression.name == "TIMESTAMP": 1277 expression.set("this", "SYSTEM_TIME") 1278 return super().version_sql(expression) 1279 1280 def contains_sql(self, expression: exp.Contains) -> str: 1281 this = expression.this 1282 expr = expression.expression 1283 1284 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1285 this = this.this 1286 expr = expr.this 1287 1288 return self.func("CONTAINS_SUBSTR", this, expr) 1289 1290 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1291 this = expression.this 1292 1293 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1294 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1295 # because they aren't literals and so the above syntax is invalid BigQuery. 1296 if isinstance(this, exp.Array): 1297 elem = seq_get(this.expressions, 0) 1298 if not (elem and elem.find(exp.Query)): 1299 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1300 1301 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1176 def column_parts(self, expression: exp.Column) -> str: 1177 if expression.meta.get("quoted_column"): 1178 # If a column reference is of the form `dataset.table`.name, we need 1179 # to preserve the quoted table path, otherwise the reference breaks 1180 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1181 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1182 return f"{table_path}.{self.sql(expression, 'this')}" 1183 1184 return super().column_parts(expression)
1186 def table_parts(self, expression: exp.Table) -> str: 1187 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1188 # we need to make sure the correct quoting is used in each case. 1189 # 1190 # For example, if there is a CTE x that clashes with a schema name, then the former will 1191 # return the table y in that schema, whereas the latter will return the CTE's y column: 1192 # 1193 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1194 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1195 if expression.meta.get("quoted_table"): 1196 table_parts = ".".join(p.name for p in expression.parts) 1197 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1198 1199 return super().table_parts(expression)
1201 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1202 this = expression.this 1203 if isinstance(this, exp.TsOrDsToDatetime): 1204 func_name = "FORMAT_DATETIME" 1205 elif isinstance(this, exp.TsOrDsToTimestamp): 1206 func_name = "FORMAT_TIMESTAMP" 1207 else: 1208 func_name = "FORMAT_DATE" 1209 1210 time_expr = ( 1211 this 1212 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1213 else expression 1214 ) 1215 return self.func( 1216 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1217 )
1219 def eq_sql(self, expression: exp.EQ) -> str: 1220 # Operands of = cannot be NULL in BigQuery 1221 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1222 if not isinstance(expression.parent, exp.Update): 1223 return "NULL" 1224 1225 return self.binary(expression, "=")
1227 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1228 parent = expression.parent 1229 1230 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1231 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1232 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1233 return self.func( 1234 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1235 ) 1236 1237 return super().attimezone_sql(expression)
1242 def bracket_sql(self, expression: exp.Bracket) -> str: 1243 this = expression.this 1244 expressions = expression.expressions 1245 1246 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1247 arg = expressions[0] 1248 if arg.type is None: 1249 from sqlglot.optimizer.annotate_types import annotate_types 1250 1251 arg = annotate_types(arg, dialect=self.dialect) 1252 1253 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1254 # BQ doesn't support bracket syntax with string values for structs 1255 return f"{self.sql(this)}.{arg.name}" 1256 1257 expressions_sql = self.expressions(expression, flat=True) 1258 offset = expression.args.get("offset") 1259 1260 if offset == 0: 1261 expressions_sql = f"OFFSET({expressions_sql})" 1262 elif offset == 1: 1263 expressions_sql = f"ORDINAL({expressions_sql})" 1264 elif offset is not None: 1265 self.unsupported(f"Unsupported array offset: {offset}") 1266 1267 if expression.args.get("safe"): 1268 expressions_sql = f"SAFE_{expressions_sql}" 1269 1270 return f"{self.sql(this)}[{expressions_sql}]"
1290 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1291 this = expression.this 1292 1293 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1294 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1295 # because they aren't literals and so the above syntax is invalid BigQuery. 1296 if isinstance(this, exp.Array): 1297 elem = seq_get(this.expressions, 0) 1298 if not (elem and elem.find(exp.Query)): 1299 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1300 1301 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql