sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33) 34from sqlglot.helper import seq_get, split_num_words 35from sqlglot.tokens import TokenType 36from sqlglot.generator import unsupported_args 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import Lit 40 41 from sqlglot.optimizer.annotate_types import TypeAnnotator 42 43logger = logging.getLogger("sqlglot") 44 45 46JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 47 48DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 49 50 51def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 52 if not expression.find_ancestor(exp.From, exp.Join): 53 return self.values_sql(expression) 54 55 structs = [] 56 alias = expression.args.get("alias") 57 for tup in expression.find_all(exp.Tuple): 58 field_aliases = ( 59 alias.columns 60 if alias and alias.columns 61 else (f"_c{i}" for i in range(len(tup.expressions))) 62 ) 63 expressions = [ 64 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 65 for name, fld in zip(field_aliases, tup.expressions) 66 ] 67 structs.append(exp.Struct(expressions=expressions)) 68 69 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 70 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 71 return self.unnest_sql( 72 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 73 ) 74 75 76def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 77 this = expression.this 78 if isinstance(this, exp.Schema): 79 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 80 else: 81 this = self.sql(this) 82 return f"RETURNS {this}" 83 84 85def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 86 returns = expression.find(exp.ReturnsProperty) 87 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 88 expression.set("kind", "TABLE FUNCTION") 89 90 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 91 expression.set("expression", expression.expression.this) 92 93 return self.create_sql(expression) 94 95 96# https://issuetracker.google.com/issues/162294746 97# workaround for bigquery bug when grouping by an expression and then ordering 98# WITH x AS (SELECT 1 y) 99# SELECT y + 1 z 100# FROM x 101# GROUP BY x + 1 102# ORDER by z 103def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 104 if isinstance(expression, exp.Select): 105 group = expression.args.get("group") 106 order = expression.args.get("order") 107 108 if group and order: 109 aliases = { 110 select.this: select.args["alias"] 111 for select in expression.selects 112 if isinstance(select, exp.Alias) 113 } 114 115 for grouped in group.expressions: 116 if grouped.is_int: 117 continue 118 alias = aliases.get(grouped) 119 if alias: 120 grouped.replace(exp.column(alias)) 121 122 return expression 123 124 125def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 126 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 127 if isinstance(expression, exp.CTE) and expression.alias_column_names: 128 cte_query = expression.this 129 130 if cte_query.is_star: 131 logger.warning( 132 "Can't push down CTE column names for star queries. Run the query through" 133 " the optimizer or use 'qualify' to expand the star projections first." 134 ) 135 return expression 136 137 column_names = expression.alias_column_names 138 expression.args["alias"].set("columns", None) 139 140 for name, select in zip(column_names, cte_query.selects): 141 to_replace = select 142 143 if isinstance(select, exp.Alias): 144 select = select.this 145 146 # Inner aliases are shadowed by the CTE column names 147 to_replace.replace(exp.alias_(select, name)) 148 149 return expression 150 151 152def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 153 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 154 this.set("zone", seq_get(args, 2)) 155 return this 156 157 158def _build_timestamp(args: t.List) -> exp.Timestamp: 159 timestamp = exp.Timestamp.from_arg_list(args) 160 timestamp.set("with_tz", True) 161 return timestamp 162 163 164def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 165 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 166 return expr_type.from_arg_list(args) 167 168 169def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 170 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 171 arg = seq_get(args, 0) 172 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 173 174 175def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 176 return self.sql( 177 exp.Exists( 178 this=exp.select("1") 179 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 180 .where(exp.column("_col").eq(expression.right)) 181 ) 182 ) 183 184 185def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 186 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 187 188 189def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 190 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 191 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 192 unit = unit_to_var(expression) 193 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 194 195 196def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 197 scale = expression.args.get("scale") 198 timestamp = expression.this 199 200 if scale in (None, exp.UnixToTime.SECONDS): 201 return self.func("TIMESTAMP_SECONDS", timestamp) 202 if scale == exp.UnixToTime.MILLIS: 203 return self.func("TIMESTAMP_MILLIS", timestamp) 204 if scale == exp.UnixToTime.MICROS: 205 return self.func("TIMESTAMP_MICROS", timestamp) 206 207 unix_seconds = exp.cast( 208 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 209 ) 210 return self.func("TIMESTAMP_SECONDS", unix_seconds) 211 212 213def _build_time(args: t.List) -> exp.Func: 214 if len(args) == 1: 215 return exp.TsOrDsToTime(this=args[0]) 216 if len(args) == 2: 217 return exp.Time.from_arg_list(args) 218 return exp.TimeFromParts.from_arg_list(args) 219 220 221def _build_datetime(args: t.List) -> exp.Func: 222 if len(args) == 1: 223 return exp.TsOrDsToDatetime.from_arg_list(args) 224 if len(args) == 2: 225 return exp.Datetime.from_arg_list(args) 226 return exp.TimestampFromParts.from_arg_list(args) 227 228 229def _build_regexp_extract( 230 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 231) -> t.Callable[[t.List], E]: 232 def _builder(args: t.List) -> E: 233 try: 234 group = re.compile(args[1].name).groups == 1 235 except re.error: 236 group = False 237 238 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 239 return expr_type( 240 this=seq_get(args, 0), 241 expression=seq_get(args, 1), 242 position=seq_get(args, 2), 243 occurrence=seq_get(args, 3), 244 group=exp.Literal.number(1) if group else default_group, 245 ) 246 247 return _builder 248 249 250def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 251 def _builder(args: t.List, dialect: Dialect) -> E: 252 if len(args) == 1: 253 # The default value for the JSONPath is '$' i.e all of the data 254 args.append(exp.Literal.string("$")) 255 return parser.build_extract_json_with_path(expr_type)(args, dialect) 256 257 return _builder 258 259 260def _str_to_datetime_sql( 261 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 262) -> str: 263 this = self.sql(expression, "this") 264 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 265 266 if expression.args.get("safe"): 267 fmt = self.format_time( 268 expression, 269 self.dialect.INVERSE_FORMAT_MAPPING, 270 self.dialect.INVERSE_FORMAT_TRIE, 271 ) 272 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 273 274 fmt = self.format_time(expression) 275 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 276 277 278def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 279 """ 280 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 281 +---------+---------+---------+------------+---------+ 282 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 283 +---------+---------+---------+------------+---------+ 284 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 285 +---------+---------+---------+------------+---------+ 286 """ 287 self._annotate_args(expression) 288 289 this: exp.Expression = expression.this 290 291 self._set_type( 292 expression, 293 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 294 ) 295 return expression 296 297 298@unsupported_args("ins_cost", "del_cost", "sub_cost") 299def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 300 max_dist = expression.args.get("max_dist") 301 if max_dist: 302 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 303 304 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 305 306 307def _build_levenshtein(args: t.List) -> exp.Levenshtein: 308 max_dist = seq_get(args, 2) 309 return exp.Levenshtein( 310 this=seq_get(args, 0), 311 expression=seq_get(args, 1), 312 max_dist=max_dist.expression if max_dist else None, 313 ) 314 315 316def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 317 def _builder(args: t.List) -> exp.TimeToStr: 318 return exp.TimeToStr( 319 this=expr_type(this=seq_get(args, 1)), 320 format=seq_get(args, 0), 321 zone=seq_get(args, 2), 322 ) 323 324 return _builder 325 326 327def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 328 if len(args) == 3: 329 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 330 331 # Lowercase the operands in case of transpilation, as exp.Contains 332 # is case-sensitive on other dialects 333 this = exp.Lower(this=seq_get(args, 0)) 334 expr = exp.Lower(this=seq_get(args, 1)) 335 336 return exp.Contains(this=this, expression=expr) 337 338 339def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 340 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 341 upper = name.upper() 342 343 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 344 345 if dquote_escaping: 346 self._quote_json_path_key_using_brackets = False 347 348 sql = rename_func(upper)(self, expression) 349 350 if dquote_escaping: 351 self._quote_json_path_key_using_brackets = True 352 353 return sql 354 355 356def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 357 annotated = self._annotate_by_args(expression, "expressions") 358 359 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 361 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 362 annotated.type = exp.DataType.Type.VARCHAR 363 364 return annotated 365 366 367class BigQuery(Dialect): 368 WEEK_OFFSET = -1 369 UNNEST_COLUMN_ONLY = True 370 SUPPORTS_USER_DEFINED_TYPES = False 371 SUPPORTS_SEMI_ANTI_JOIN = False 372 LOG_BASE_FIRST = False 373 HEX_LOWERCASE = True 374 FORCE_EARLY_ALIAS_REF_EXPANSION = True 375 PRESERVE_ORIGINAL_NAMES = True 376 HEX_STRING_IS_INTEGER_TYPE = True 377 378 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 379 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 380 381 # bigquery udfs are case sensitive 382 NORMALIZE_FUNCTIONS = False 383 384 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 385 TIME_MAPPING = { 386 "%D": "%m/%d/%y", 387 "%E6S": "%S.%f", 388 "%e": "%-d", 389 } 390 391 FORMAT_MAPPING = { 392 "DD": "%d", 393 "MM": "%m", 394 "MON": "%b", 395 "MONTH": "%B", 396 "YYYY": "%Y", 397 "YY": "%y", 398 "HH": "%I", 399 "HH12": "%I", 400 "HH24": "%H", 401 "MI": "%M", 402 "SS": "%S", 403 "SSSSS": "%f", 404 "TZH": "%z", 405 } 406 407 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 408 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 409 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 410 411 # All set operations require either a DISTINCT or ALL specifier 412 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 413 414 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 415 TYPE_TO_EXPRESSIONS = { 416 **Dialect.TYPE_TO_EXPRESSIONS, 417 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 418 } 419 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 420 421 ANNOTATORS = { 422 **Dialect.ANNOTATORS, 423 **{ 424 expr_type: annotate_with_type_lambda(data_type) 425 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 426 for expr_type in expressions 427 }, 428 **{ 429 expr_type: lambda self, e: _annotate_math_functions(self, e) 430 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 431 }, 432 **{ 433 expr_type: lambda self, e: self._annotate_by_args(e, "this") 434 for expr_type in ( 435 exp.Left, 436 exp.Right, 437 exp.Lower, 438 exp.Upper, 439 exp.Pad, 440 exp.Trim, 441 exp.RegexpExtract, 442 exp.RegexpReplace, 443 exp.Repeat, 444 exp.Substring, 445 ) 446 }, 447 exp.Concat: _annotate_concat, 448 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 449 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 450 } 451 452 def normalize_identifier(self, expression: E) -> E: 453 if ( 454 isinstance(expression, exp.Identifier) 455 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 456 ): 457 parent = expression.parent 458 while isinstance(parent, exp.Dot): 459 parent = parent.parent 460 461 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 462 # by default. The following check uses a heuristic to detect tables based on whether 463 # they are qualified. This should generally be correct, because tables in BigQuery 464 # must be qualified with at least a dataset, unless @@dataset_id is set. 465 case_sensitive = ( 466 isinstance(parent, exp.UserDefinedFunction) 467 or ( 468 isinstance(parent, exp.Table) 469 and parent.db 470 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 471 ) 472 or expression.meta.get("is_table") 473 ) 474 if not case_sensitive: 475 expression.set("this", expression.this.lower()) 476 477 return t.cast(E, expression) 478 479 return super().normalize_identifier(expression) 480 481 class Tokenizer(tokens.Tokenizer): 482 QUOTES = ["'", '"', '"""', "'''"] 483 COMMENTS = ["--", "#", ("/*", "*/")] 484 IDENTIFIERS = ["`"] 485 STRING_ESCAPES = ["\\"] 486 487 HEX_STRINGS = [("0x", ""), ("0X", "")] 488 489 BYTE_STRINGS = [ 490 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 491 ] 492 493 RAW_STRINGS = [ 494 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 495 ] 496 497 KEYWORDS = { 498 **tokens.Tokenizer.KEYWORDS, 499 "ANY TYPE": TokenType.VARIANT, 500 "BEGIN": TokenType.COMMAND, 501 "BEGIN TRANSACTION": TokenType.BEGIN, 502 "BYTEINT": TokenType.INT, 503 "BYTES": TokenType.BINARY, 504 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 505 "DATETIME": TokenType.TIMESTAMP, 506 "DECLARE": TokenType.COMMAND, 507 "ELSEIF": TokenType.COMMAND, 508 "EXCEPTION": TokenType.COMMAND, 509 "EXPORT": TokenType.EXPORT, 510 "FLOAT64": TokenType.DOUBLE, 511 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 512 "MODEL": TokenType.MODEL, 513 "NOT DETERMINISTIC": TokenType.VOLATILE, 514 "RECORD": TokenType.STRUCT, 515 "TIMESTAMP": TokenType.TIMESTAMPTZ, 516 } 517 KEYWORDS.pop("DIV") 518 KEYWORDS.pop("VALUES") 519 KEYWORDS.pop("/*+") 520 521 class Parser(parser.Parser): 522 PREFIXED_PIVOT_COLUMNS = True 523 LOG_DEFAULTS_TO_LN = True 524 SUPPORTS_IMPLICIT_UNNEST = True 525 526 FUNCTIONS = { 527 **parser.Parser.FUNCTIONS, 528 "CONTAINS_SUBSTR": _build_contains_substring, 529 "DATE": _build_date, 530 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 531 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 532 "DATE_TRUNC": lambda args: exp.DateTrunc( 533 unit=exp.Literal.string(str(seq_get(args, 1))), 534 this=seq_get(args, 0), 535 zone=seq_get(args, 2), 536 ), 537 "DATETIME": _build_datetime, 538 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 539 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 540 "DIV": binary_from_function(exp.IntDiv), 541 "EDIT_DISTANCE": _build_levenshtein, 542 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 543 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 544 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 545 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 546 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 547 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 548 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 549 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 550 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 551 "MD5": exp.MD5Digest.from_arg_list, 552 "TO_HEX": _build_to_hex, 553 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 554 [seq_get(args, 1), seq_get(args, 0)] 555 ), 556 "PARSE_TIMESTAMP": _build_parse_timestamp, 557 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 558 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 559 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 560 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 561 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 562 ), 563 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 564 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 565 "SPLIT": lambda args: exp.Split( 566 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 567 this=seq_get(args, 0), 568 expression=seq_get(args, 1) or exp.Literal.string(","), 569 ), 570 "STRPOS": exp.StrPosition.from_arg_list, 571 "TIME": _build_time, 572 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 573 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 574 "TIMESTAMP": _build_timestamp, 575 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 576 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 577 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 578 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 579 ), 580 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 581 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 582 ), 583 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 584 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 585 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 586 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 587 } 588 589 FUNCTION_PARSERS = { 590 **parser.Parser.FUNCTION_PARSERS, 591 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 592 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 593 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 594 } 595 FUNCTION_PARSERS.pop("TRIM") 596 597 NO_PAREN_FUNCTIONS = { 598 **parser.Parser.NO_PAREN_FUNCTIONS, 599 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 600 } 601 602 NESTED_TYPE_TOKENS = { 603 *parser.Parser.NESTED_TYPE_TOKENS, 604 TokenType.TABLE, 605 } 606 607 PROPERTY_PARSERS = { 608 **parser.Parser.PROPERTY_PARSERS, 609 "NOT DETERMINISTIC": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 611 ), 612 "OPTIONS": lambda self: self._parse_with_property(), 613 } 614 615 CONSTRAINT_PARSERS = { 616 **parser.Parser.CONSTRAINT_PARSERS, 617 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 618 } 619 620 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 621 RANGE_PARSERS.pop(TokenType.OVERLAPS) 622 623 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 624 625 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 626 627 STATEMENT_PARSERS = { 628 **parser.Parser.STATEMENT_PARSERS, 629 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 630 TokenType.END: lambda self: self._parse_as_command(self._prev), 631 TokenType.FOR: lambda self: self._parse_for_in(), 632 TokenType.EXPORT: lambda self: self._parse_export_data(), 633 } 634 635 BRACKET_OFFSETS = { 636 "OFFSET": (0, False), 637 "ORDINAL": (1, False), 638 "SAFE_OFFSET": (0, True), 639 "SAFE_ORDINAL": (1, True), 640 } 641 642 def _parse_for_in(self) -> exp.ForIn: 643 this = self._parse_range() 644 self._match_text_seq("DO") 645 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 646 647 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 648 this = super()._parse_table_part(schema=schema) or self._parse_number() 649 650 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 651 if isinstance(this, exp.Identifier): 652 table_name = this.name 653 while self._match(TokenType.DASH, advance=False) and self._next: 654 start = self._curr 655 while self._is_connected() and not self._match_set( 656 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 657 ): 658 self._advance() 659 660 if start == self._curr: 661 break 662 663 table_name += self._find_sql(start, self._prev) 664 665 this = exp.Identifier( 666 this=table_name, quoted=this.args.get("quoted") 667 ).update_positions(this) 668 elif isinstance(this, exp.Literal): 669 table_name = this.name 670 671 if self._is_connected() and self._parse_var(any_token=True): 672 table_name += self._prev.text 673 674 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 675 676 return this 677 678 def _parse_table_parts( 679 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 680 ) -> exp.Table: 681 table = super()._parse_table_parts( 682 schema=schema, is_db_reference=is_db_reference, wildcard=True 683 ) 684 685 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 686 if not table.catalog: 687 if table.db: 688 previous_db = table.args["db"] 689 parts = table.db.split(".") 690 if len(parts) == 2 and not table.args["db"].quoted: 691 table.set( 692 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 693 ) 694 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 695 else: 696 previous_this = table.this 697 parts = table.name.split(".") 698 if len(parts) == 2 and not table.this.quoted: 699 table.set( 700 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 701 ) 702 table.set( 703 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 704 ) 705 706 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 707 alias = table.this 708 catalog, db, this, *rest = ( 709 exp.to_identifier(p, quoted=True) 710 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 711 ) 712 713 for part in (catalog, db, this): 714 if part: 715 part.update_positions(table.this) 716 717 if rest and this: 718 this = exp.Dot.build([this, *rest]) # type: ignore 719 720 table = exp.Table( 721 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 722 ) 723 table.meta["quoted_table"] = True 724 else: 725 alias = None 726 727 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 728 # dataset, so if the project identifier is omitted we need to fix the ast so that 729 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 730 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 731 # views, because it would seem like the "catalog" part is set, when it'd actually 732 # be the region/dataset. Merging the two identifiers into a single one is done to 733 # avoid producing a 4-part Table reference, which would cause issues in the schema 734 # module, when there are 3-part table names mixed with information schema views. 735 # 736 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 737 table_parts = table.parts 738 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 739 # We need to alias the table here to avoid breaking existing qualified columns. 740 # This is expected to be safe, because if there's an actual alias coming up in 741 # the token stream, it will overwrite this one. If there isn't one, we are only 742 # exposing the name that can be used to reference the view explicitly (a no-op). 743 exp.alias_( 744 table, 745 t.cast(exp.Identifier, alias or table_parts[-1]), 746 table=True, 747 copy=False, 748 ) 749 750 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 751 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 752 line=table_parts[-2].meta.get("line"), 753 col=table_parts[-1].meta.get("col"), 754 start=table_parts[-2].meta.get("start"), 755 end=table_parts[-1].meta.get("end"), 756 ) 757 table.set("this", new_this) 758 table.set("db", seq_get(table_parts, -3)) 759 table.set("catalog", seq_get(table_parts, -4)) 760 761 return table 762 763 def _parse_column(self) -> t.Optional[exp.Expression]: 764 column = super()._parse_column() 765 if isinstance(column, exp.Column): 766 parts = column.parts 767 if any("." in p.name for p in parts): 768 catalog, db, table, this, *rest = ( 769 exp.to_identifier(p, quoted=True) 770 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 771 ) 772 773 if rest and this: 774 this = exp.Dot.build([this, *rest]) # type: ignore 775 776 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 777 column.meta["quoted_column"] = True 778 779 return column 780 781 @t.overload 782 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 783 784 @t.overload 785 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 786 787 def _parse_json_object(self, agg=False): 788 json_object = super()._parse_json_object() 789 array_kv_pair = seq_get(json_object.expressions, 0) 790 791 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 792 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 793 if ( 794 array_kv_pair 795 and isinstance(array_kv_pair.this, exp.Array) 796 and isinstance(array_kv_pair.expression, exp.Array) 797 ): 798 keys = array_kv_pair.this.expressions 799 values = array_kv_pair.expression.expressions 800 801 json_object.set( 802 "expressions", 803 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 804 ) 805 806 return json_object 807 808 def _parse_bracket( 809 self, this: t.Optional[exp.Expression] = None 810 ) -> t.Optional[exp.Expression]: 811 bracket = super()._parse_bracket(this) 812 813 if this is bracket: 814 return bracket 815 816 if isinstance(bracket, exp.Bracket): 817 for expression in bracket.expressions: 818 name = expression.name.upper() 819 820 if name not in self.BRACKET_OFFSETS: 821 break 822 823 offset, safe = self.BRACKET_OFFSETS[name] 824 bracket.set("offset", offset) 825 bracket.set("safe", safe) 826 expression.replace(expression.expressions[0]) 827 828 return bracket 829 830 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 831 unnest = super()._parse_unnest(with_alias=with_alias) 832 833 if not unnest: 834 return None 835 836 unnest_expr = seq_get(unnest.expressions, 0) 837 if unnest_expr: 838 from sqlglot.optimizer.annotate_types import annotate_types 839 840 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 841 842 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 843 # in contrast to other dialects such as DuckDB which flattens only the array by default 844 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 845 array_elem.is_type(exp.DataType.Type.STRUCT) 846 for array_elem in unnest_expr._type.expressions 847 ): 848 unnest.set("explode_array", True) 849 850 return unnest 851 852 def _parse_make_interval(self) -> exp.MakeInterval: 853 expr = exp.MakeInterval() 854 855 for arg_key in expr.arg_types: 856 value = self._parse_lambda() 857 858 if not value: 859 break 860 861 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 862 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 863 if isinstance(value, exp.Kwarg): 864 arg_key = value.this.name 865 866 expr.set(arg_key, value) 867 868 self._match(TokenType.COMMA) 869 870 return expr 871 872 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 873 expr = self.expression( 874 exp.FeaturesAtTime, 875 this=(self._match(TokenType.TABLE) and self._parse_table()) 876 or self._parse_select(nested=True), 877 ) 878 879 while self._match(TokenType.COMMA): 880 arg = self._parse_lambda() 881 882 # Get the LHS of the Kwarg and set the arg to that value, e.g 883 # "num_rows => 1" sets the expr's `num_rows` arg 884 if arg: 885 expr.set(arg.this.name, arg) 886 887 return expr 888 889 def _parse_export_data(self) -> exp.Export: 890 self._match_text_seq("DATA") 891 892 return self.expression( 893 exp.Export, 894 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 895 options=self._parse_properties(), 896 this=self._match_text_seq("AS") and self._parse_select(), 897 ) 898 899 class Generator(generator.Generator): 900 INTERVAL_ALLOWS_PLURAL_FORM = False 901 JOIN_HINTS = False 902 QUERY_HINTS = False 903 TABLE_HINTS = False 904 LIMIT_FETCH = "LIMIT" 905 RENAME_TABLE_WITH_DB = False 906 NVL2_SUPPORTED = False 907 UNNEST_WITH_ORDINALITY = False 908 COLLATE_IS_FUNC = True 909 LIMIT_ONLY_LITERALS = True 910 SUPPORTS_TABLE_ALIAS_COLUMNS = False 911 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 912 JSON_KEY_VALUE_PAIR_SEP = "," 913 NULL_ORDERING_SUPPORTED = False 914 IGNORE_NULLS_IN_FUNC = True 915 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 916 CAN_IMPLEMENT_ARRAY_ANY = True 917 SUPPORTS_TO_NUMBER = False 918 NAMED_PLACEHOLDER_TOKEN = "@" 919 HEX_FUNC = "TO_HEX" 920 WITH_PROPERTIES_PREFIX = "OPTIONS" 921 SUPPORTS_EXPLODING_PROJECTIONS = False 922 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 923 SUPPORTS_UNIX_SECONDS = True 924 925 TRANSFORMS = { 926 **generator.Generator.TRANSFORMS, 927 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 928 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 929 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 930 exp.Array: inline_array_unless_query, 931 exp.ArrayContains: _array_contains_sql, 932 exp.ArrayFilter: filter_array_using_unnest, 933 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 934 exp.CollateProperty: lambda self, e: ( 935 f"DEFAULT COLLATE {self.sql(e, 'this')}" 936 if e.args.get("default") 937 else f"COLLATE {self.sql(e, 'this')}" 938 ), 939 exp.Commit: lambda *_: "COMMIT TRANSACTION", 940 exp.CountIf: rename_func("COUNTIF"), 941 exp.Create: _create_sql, 942 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 943 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 944 exp.DateDiff: lambda self, e: self.func( 945 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 946 ), 947 exp.DateFromParts: rename_func("DATE"), 948 exp.DateStrToDate: datestrtodate_sql, 949 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 950 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 951 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 952 exp.DateTrunc: lambda self, e: self.func( 953 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 954 ), 955 exp.FromTimeZone: lambda self, e: self.func( 956 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 957 ), 958 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 959 exp.GroupConcat: lambda self, e: groupconcat_sql( 960 self, e, func_name="STRING_AGG", within_group=False 961 ), 962 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 963 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 964 exp.If: if_sql(false_value="NULL"), 965 exp.ILike: no_ilike_sql, 966 exp.IntDiv: rename_func("DIV"), 967 exp.Int64: rename_func("INT64"), 968 exp.JSONExtract: _json_extract_sql, 969 exp.JSONExtractArray: _json_extract_sql, 970 exp.JSONExtractScalar: _json_extract_sql, 971 exp.JSONFormat: rename_func("TO_JSON_STRING"), 972 exp.Levenshtein: _levenshtein_sql, 973 exp.Max: max_or_greatest, 974 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 975 exp.MD5Digest: rename_func("MD5"), 976 exp.Min: min_or_least, 977 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 978 exp.RegexpExtract: lambda self, e: self.func( 979 "REGEXP_EXTRACT", 980 e.this, 981 e.expression, 982 e.args.get("position"), 983 e.args.get("occurrence"), 984 ), 985 exp.RegexpExtractAll: lambda self, e: self.func( 986 "REGEXP_EXTRACT_ALL", e.this, e.expression 987 ), 988 exp.RegexpReplace: regexp_replace_sql, 989 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 990 exp.ReturnsProperty: _returnsproperty_sql, 991 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 992 exp.Select: transforms.preprocess( 993 [ 994 transforms.explode_projection_to_unnest(), 995 transforms.unqualify_unnest, 996 transforms.eliminate_distinct_on, 997 _alias_ordered_group, 998 transforms.eliminate_semi_and_anti_joins, 999 ] 1000 ), 1001 exp.SHA: rename_func("SHA1"), 1002 exp.SHA2: sha256_sql, 1003 exp.StabilityProperty: lambda self, e: ( 1004 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1005 ), 1006 exp.String: rename_func("STRING"), 1007 exp.StrPosition: lambda self, e: ( 1008 strposition_sql( 1009 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1010 ) 1011 ), 1012 exp.StrToDate: _str_to_datetime_sql, 1013 exp.StrToTime: _str_to_datetime_sql, 1014 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1015 exp.TimeFromParts: rename_func("TIME"), 1016 exp.TimestampFromParts: rename_func("DATETIME"), 1017 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1018 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1019 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1020 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1021 exp.TimeStrToTime: timestrtotime_sql, 1022 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1023 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1024 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1025 exp.TsOrDsToTime: rename_func("TIME"), 1026 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1027 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1028 exp.Unhex: rename_func("FROM_HEX"), 1029 exp.UnixDate: rename_func("UNIX_DATE"), 1030 exp.UnixToTime: _unix_to_time_sql, 1031 exp.Uuid: lambda *_: "GENERATE_UUID()", 1032 exp.Values: _derived_table_values_to_unnest, 1033 exp.VariancePop: rename_func("VAR_POP"), 1034 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1035 } 1036 1037 SUPPORTED_JSON_PATH_PARTS = { 1038 exp.JSONPathKey, 1039 exp.JSONPathRoot, 1040 exp.JSONPathSubscript, 1041 } 1042 1043 TYPE_MAPPING = { 1044 **generator.Generator.TYPE_MAPPING, 1045 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1046 exp.DataType.Type.BIGINT: "INT64", 1047 exp.DataType.Type.BINARY: "BYTES", 1048 exp.DataType.Type.BLOB: "BYTES", 1049 exp.DataType.Type.BOOLEAN: "BOOL", 1050 exp.DataType.Type.CHAR: "STRING", 1051 exp.DataType.Type.DECIMAL: "NUMERIC", 1052 exp.DataType.Type.DOUBLE: "FLOAT64", 1053 exp.DataType.Type.FLOAT: "FLOAT64", 1054 exp.DataType.Type.INT: "INT64", 1055 exp.DataType.Type.NCHAR: "STRING", 1056 exp.DataType.Type.NVARCHAR: "STRING", 1057 exp.DataType.Type.SMALLINT: "INT64", 1058 exp.DataType.Type.TEXT: "STRING", 1059 exp.DataType.Type.TIMESTAMP: "DATETIME", 1060 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1061 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1062 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1063 exp.DataType.Type.TINYINT: "INT64", 1064 exp.DataType.Type.ROWVERSION: "BYTES", 1065 exp.DataType.Type.UUID: "STRING", 1066 exp.DataType.Type.VARBINARY: "BYTES", 1067 exp.DataType.Type.VARCHAR: "STRING", 1068 exp.DataType.Type.VARIANT: "ANY TYPE", 1069 } 1070 1071 PROPERTIES_LOCATION = { 1072 **generator.Generator.PROPERTIES_LOCATION, 1073 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1074 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1075 } 1076 1077 # WINDOW comes after QUALIFY 1078 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1079 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1080 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1081 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1082 } 1083 1084 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1085 RESERVED_KEYWORDS = { 1086 "all", 1087 "and", 1088 "any", 1089 "array", 1090 "as", 1091 "asc", 1092 "assert_rows_modified", 1093 "at", 1094 "between", 1095 "by", 1096 "case", 1097 "cast", 1098 "collate", 1099 "contains", 1100 "create", 1101 "cross", 1102 "cube", 1103 "current", 1104 "default", 1105 "define", 1106 "desc", 1107 "distinct", 1108 "else", 1109 "end", 1110 "enum", 1111 "escape", 1112 "except", 1113 "exclude", 1114 "exists", 1115 "extract", 1116 "false", 1117 "fetch", 1118 "following", 1119 "for", 1120 "from", 1121 "full", 1122 "group", 1123 "grouping", 1124 "groups", 1125 "hash", 1126 "having", 1127 "if", 1128 "ignore", 1129 "in", 1130 "inner", 1131 "intersect", 1132 "interval", 1133 "into", 1134 "is", 1135 "join", 1136 "lateral", 1137 "left", 1138 "like", 1139 "limit", 1140 "lookup", 1141 "merge", 1142 "natural", 1143 "new", 1144 "no", 1145 "not", 1146 "null", 1147 "nulls", 1148 "of", 1149 "on", 1150 "or", 1151 "order", 1152 "outer", 1153 "over", 1154 "partition", 1155 "preceding", 1156 "proto", 1157 "qualify", 1158 "range", 1159 "recursive", 1160 "respect", 1161 "right", 1162 "rollup", 1163 "rows", 1164 "select", 1165 "set", 1166 "some", 1167 "struct", 1168 "tablesample", 1169 "then", 1170 "to", 1171 "treat", 1172 "true", 1173 "unbounded", 1174 "union", 1175 "unnest", 1176 "using", 1177 "when", 1178 "where", 1179 "window", 1180 "with", 1181 "within", 1182 } 1183 1184 def mod_sql(self, expression: exp.Mod) -> str: 1185 this = expression.this 1186 expr = expression.expression 1187 return self.func( 1188 "MOD", 1189 this.unnest() if isinstance(this, exp.Paren) else this, 1190 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1191 ) 1192 1193 def column_parts(self, expression: exp.Column) -> str: 1194 if expression.meta.get("quoted_column"): 1195 # If a column reference is of the form `dataset.table`.name, we need 1196 # to preserve the quoted table path, otherwise the reference breaks 1197 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1198 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1199 return f"{table_path}.{self.sql(expression, 'this')}" 1200 1201 return super().column_parts(expression) 1202 1203 def table_parts(self, expression: exp.Table) -> str: 1204 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1205 # we need to make sure the correct quoting is used in each case. 1206 # 1207 # For example, if there is a CTE x that clashes with a schema name, then the former will 1208 # return the table y in that schema, whereas the latter will return the CTE's y column: 1209 # 1210 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1211 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1212 if expression.meta.get("quoted_table"): 1213 table_parts = ".".join(p.name for p in expression.parts) 1214 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1215 1216 return super().table_parts(expression) 1217 1218 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1219 this = expression.this 1220 if isinstance(this, exp.TsOrDsToDatetime): 1221 func_name = "FORMAT_DATETIME" 1222 elif isinstance(this, exp.TsOrDsToTimestamp): 1223 func_name = "FORMAT_TIMESTAMP" 1224 else: 1225 func_name = "FORMAT_DATE" 1226 1227 time_expr = ( 1228 this 1229 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1230 else expression 1231 ) 1232 return self.func( 1233 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1234 ) 1235 1236 def eq_sql(self, expression: exp.EQ) -> str: 1237 # Operands of = cannot be NULL in BigQuery 1238 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1239 if not isinstance(expression.parent, exp.Update): 1240 return "NULL" 1241 1242 return self.binary(expression, "=") 1243 1244 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1245 parent = expression.parent 1246 1247 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1248 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1249 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1250 return self.func( 1251 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1252 ) 1253 1254 return super().attimezone_sql(expression) 1255 1256 def trycast_sql(self, expression: exp.TryCast) -> str: 1257 return self.cast_sql(expression, safe_prefix="SAFE_") 1258 1259 def bracket_sql(self, expression: exp.Bracket) -> str: 1260 this = expression.this 1261 expressions = expression.expressions 1262 1263 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1264 arg = expressions[0] 1265 if arg.type is None: 1266 from sqlglot.optimizer.annotate_types import annotate_types 1267 1268 arg = annotate_types(arg, dialect=self.dialect) 1269 1270 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1271 # BQ doesn't support bracket syntax with string values for structs 1272 return f"{self.sql(this)}.{arg.name}" 1273 1274 expressions_sql = self.expressions(expression, flat=True) 1275 offset = expression.args.get("offset") 1276 1277 if offset == 0: 1278 expressions_sql = f"OFFSET({expressions_sql})" 1279 elif offset == 1: 1280 expressions_sql = f"ORDINAL({expressions_sql})" 1281 elif offset is not None: 1282 self.unsupported(f"Unsupported array offset: {offset}") 1283 1284 if expression.args.get("safe"): 1285 expressions_sql = f"SAFE_{expressions_sql}" 1286 1287 return f"{self.sql(this)}[{expressions_sql}]" 1288 1289 def in_unnest_op(self, expression: exp.Unnest) -> str: 1290 return self.sql(expression) 1291 1292 def version_sql(self, expression: exp.Version) -> str: 1293 if expression.name == "TIMESTAMP": 1294 expression.set("this", "SYSTEM_TIME") 1295 return super().version_sql(expression) 1296 1297 def contains_sql(self, expression: exp.Contains) -> str: 1298 this = expression.this 1299 expr = expression.expression 1300 1301 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1302 this = this.this 1303 expr = expr.this 1304 1305 return self.func("CONTAINS_SUBSTR", this, expr) 1306 1307 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1308 this = expression.this 1309 1310 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1311 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1312 # because they aren't literals and so the above syntax is invalid BigQuery. 1313 if isinstance(this, exp.Array): 1314 elem = seq_get(this.expressions, 0) 1315 if not (elem and elem.find(exp.Query)): 1316 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1317 1318 return super().cast_sql(expression, safe_prefix=safe_prefix)
368class BigQuery(Dialect): 369 WEEK_OFFSET = -1 370 UNNEST_COLUMN_ONLY = True 371 SUPPORTS_USER_DEFINED_TYPES = False 372 SUPPORTS_SEMI_ANTI_JOIN = False 373 LOG_BASE_FIRST = False 374 HEX_LOWERCASE = True 375 FORCE_EARLY_ALIAS_REF_EXPANSION = True 376 PRESERVE_ORIGINAL_NAMES = True 377 HEX_STRING_IS_INTEGER_TYPE = True 378 379 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 380 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 381 382 # bigquery udfs are case sensitive 383 NORMALIZE_FUNCTIONS = False 384 385 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 386 TIME_MAPPING = { 387 "%D": "%m/%d/%y", 388 "%E6S": "%S.%f", 389 "%e": "%-d", 390 } 391 392 FORMAT_MAPPING = { 393 "DD": "%d", 394 "MM": "%m", 395 "MON": "%b", 396 "MONTH": "%B", 397 "YYYY": "%Y", 398 "YY": "%y", 399 "HH": "%I", 400 "HH12": "%I", 401 "HH24": "%H", 402 "MI": "%M", 403 "SS": "%S", 404 "SSSSS": "%f", 405 "TZH": "%z", 406 } 407 408 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 409 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 410 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 411 412 # All set operations require either a DISTINCT or ALL specifier 413 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 414 415 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 416 TYPE_TO_EXPRESSIONS = { 417 **Dialect.TYPE_TO_EXPRESSIONS, 418 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 419 } 420 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 421 422 ANNOTATORS = { 423 **Dialect.ANNOTATORS, 424 **{ 425 expr_type: annotate_with_type_lambda(data_type) 426 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 427 for expr_type in expressions 428 }, 429 **{ 430 expr_type: lambda self, e: _annotate_math_functions(self, e) 431 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 432 }, 433 **{ 434 expr_type: lambda self, e: self._annotate_by_args(e, "this") 435 for expr_type in ( 436 exp.Left, 437 exp.Right, 438 exp.Lower, 439 exp.Upper, 440 exp.Pad, 441 exp.Trim, 442 exp.RegexpExtract, 443 exp.RegexpReplace, 444 exp.Repeat, 445 exp.Substring, 446 ) 447 }, 448 exp.Concat: _annotate_concat, 449 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 450 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 451 } 452 453 def normalize_identifier(self, expression: E) -> E: 454 if ( 455 isinstance(expression, exp.Identifier) 456 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 457 ): 458 parent = expression.parent 459 while isinstance(parent, exp.Dot): 460 parent = parent.parent 461 462 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 463 # by default. The following check uses a heuristic to detect tables based on whether 464 # they are qualified. This should generally be correct, because tables in BigQuery 465 # must be qualified with at least a dataset, unless @@dataset_id is set. 466 case_sensitive = ( 467 isinstance(parent, exp.UserDefinedFunction) 468 or ( 469 isinstance(parent, exp.Table) 470 and parent.db 471 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 472 ) 473 or expression.meta.get("is_table") 474 ) 475 if not case_sensitive: 476 expression.set("this", expression.this.lower()) 477 478 return t.cast(E, expression) 479 480 return super().normalize_identifier(expression) 481 482 class Tokenizer(tokens.Tokenizer): 483 QUOTES = ["'", '"', '"""', "'''"] 484 COMMENTS = ["--", "#", ("/*", "*/")] 485 IDENTIFIERS = ["`"] 486 STRING_ESCAPES = ["\\"] 487 488 HEX_STRINGS = [("0x", ""), ("0X", "")] 489 490 BYTE_STRINGS = [ 491 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 492 ] 493 494 RAW_STRINGS = [ 495 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 496 ] 497 498 KEYWORDS = { 499 **tokens.Tokenizer.KEYWORDS, 500 "ANY TYPE": TokenType.VARIANT, 501 "BEGIN": TokenType.COMMAND, 502 "BEGIN TRANSACTION": TokenType.BEGIN, 503 "BYTEINT": TokenType.INT, 504 "BYTES": TokenType.BINARY, 505 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 506 "DATETIME": TokenType.TIMESTAMP, 507 "DECLARE": TokenType.COMMAND, 508 "ELSEIF": TokenType.COMMAND, 509 "EXCEPTION": TokenType.COMMAND, 510 "EXPORT": TokenType.EXPORT, 511 "FLOAT64": TokenType.DOUBLE, 512 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 513 "MODEL": TokenType.MODEL, 514 "NOT DETERMINISTIC": TokenType.VOLATILE, 515 "RECORD": TokenType.STRUCT, 516 "TIMESTAMP": TokenType.TIMESTAMPTZ, 517 } 518 KEYWORDS.pop("DIV") 519 KEYWORDS.pop("VALUES") 520 KEYWORDS.pop("/*+") 521 522 class Parser(parser.Parser): 523 PREFIXED_PIVOT_COLUMNS = True 524 LOG_DEFAULTS_TO_LN = True 525 SUPPORTS_IMPLICIT_UNNEST = True 526 527 FUNCTIONS = { 528 **parser.Parser.FUNCTIONS, 529 "CONTAINS_SUBSTR": _build_contains_substring, 530 "DATE": _build_date, 531 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 532 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 533 "DATE_TRUNC": lambda args: exp.DateTrunc( 534 unit=exp.Literal.string(str(seq_get(args, 1))), 535 this=seq_get(args, 0), 536 zone=seq_get(args, 2), 537 ), 538 "DATETIME": _build_datetime, 539 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 540 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 541 "DIV": binary_from_function(exp.IntDiv), 542 "EDIT_DISTANCE": _build_levenshtein, 543 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 544 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 545 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 546 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 547 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 548 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 549 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 550 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 551 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 552 "MD5": exp.MD5Digest.from_arg_list, 553 "TO_HEX": _build_to_hex, 554 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 555 [seq_get(args, 1), seq_get(args, 0)] 556 ), 557 "PARSE_TIMESTAMP": _build_parse_timestamp, 558 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 559 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 560 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 561 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 562 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 563 ), 564 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 565 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 566 "SPLIT": lambda args: exp.Split( 567 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 568 this=seq_get(args, 0), 569 expression=seq_get(args, 1) or exp.Literal.string(","), 570 ), 571 "STRPOS": exp.StrPosition.from_arg_list, 572 "TIME": _build_time, 573 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 574 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 575 "TIMESTAMP": _build_timestamp, 576 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 577 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 578 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 579 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 580 ), 581 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 582 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 583 ), 584 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 585 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 586 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 587 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 588 } 589 590 FUNCTION_PARSERS = { 591 **parser.Parser.FUNCTION_PARSERS, 592 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 593 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 594 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 595 } 596 FUNCTION_PARSERS.pop("TRIM") 597 598 NO_PAREN_FUNCTIONS = { 599 **parser.Parser.NO_PAREN_FUNCTIONS, 600 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 601 } 602 603 NESTED_TYPE_TOKENS = { 604 *parser.Parser.NESTED_TYPE_TOKENS, 605 TokenType.TABLE, 606 } 607 608 PROPERTY_PARSERS = { 609 **parser.Parser.PROPERTY_PARSERS, 610 "NOT DETERMINISTIC": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 612 ), 613 "OPTIONS": lambda self: self._parse_with_property(), 614 } 615 616 CONSTRAINT_PARSERS = { 617 **parser.Parser.CONSTRAINT_PARSERS, 618 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 619 } 620 621 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 622 RANGE_PARSERS.pop(TokenType.OVERLAPS) 623 624 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 625 626 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 627 628 STATEMENT_PARSERS = { 629 **parser.Parser.STATEMENT_PARSERS, 630 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 631 TokenType.END: lambda self: self._parse_as_command(self._prev), 632 TokenType.FOR: lambda self: self._parse_for_in(), 633 TokenType.EXPORT: lambda self: self._parse_export_data(), 634 } 635 636 BRACKET_OFFSETS = { 637 "OFFSET": (0, False), 638 "ORDINAL": (1, False), 639 "SAFE_OFFSET": (0, True), 640 "SAFE_ORDINAL": (1, True), 641 } 642 643 def _parse_for_in(self) -> exp.ForIn: 644 this = self._parse_range() 645 self._match_text_seq("DO") 646 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 647 648 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 649 this = super()._parse_table_part(schema=schema) or self._parse_number() 650 651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 652 if isinstance(this, exp.Identifier): 653 table_name = this.name 654 while self._match(TokenType.DASH, advance=False) and self._next: 655 start = self._curr 656 while self._is_connected() and not self._match_set( 657 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 658 ): 659 self._advance() 660 661 if start == self._curr: 662 break 663 664 table_name += self._find_sql(start, self._prev) 665 666 this = exp.Identifier( 667 this=table_name, quoted=this.args.get("quoted") 668 ).update_positions(this) 669 elif isinstance(this, exp.Literal): 670 table_name = this.name 671 672 if self._is_connected() and self._parse_var(any_token=True): 673 table_name += self._prev.text 674 675 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 676 677 return this 678 679 def _parse_table_parts( 680 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 681 ) -> exp.Table: 682 table = super()._parse_table_parts( 683 schema=schema, is_db_reference=is_db_reference, wildcard=True 684 ) 685 686 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 687 if not table.catalog: 688 if table.db: 689 previous_db = table.args["db"] 690 parts = table.db.split(".") 691 if len(parts) == 2 and not table.args["db"].quoted: 692 table.set( 693 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 694 ) 695 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 696 else: 697 previous_this = table.this 698 parts = table.name.split(".") 699 if len(parts) == 2 and not table.this.quoted: 700 table.set( 701 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 702 ) 703 table.set( 704 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 705 ) 706 707 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 708 alias = table.this 709 catalog, db, this, *rest = ( 710 exp.to_identifier(p, quoted=True) 711 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 712 ) 713 714 for part in (catalog, db, this): 715 if part: 716 part.update_positions(table.this) 717 718 if rest and this: 719 this = exp.Dot.build([this, *rest]) # type: ignore 720 721 table = exp.Table( 722 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 723 ) 724 table.meta["quoted_table"] = True 725 else: 726 alias = None 727 728 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 729 # dataset, so if the project identifier is omitted we need to fix the ast so that 730 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 731 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 732 # views, because it would seem like the "catalog" part is set, when it'd actually 733 # be the region/dataset. Merging the two identifiers into a single one is done to 734 # avoid producing a 4-part Table reference, which would cause issues in the schema 735 # module, when there are 3-part table names mixed with information schema views. 736 # 737 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 738 table_parts = table.parts 739 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 740 # We need to alias the table here to avoid breaking existing qualified columns. 741 # This is expected to be safe, because if there's an actual alias coming up in 742 # the token stream, it will overwrite this one. If there isn't one, we are only 743 # exposing the name that can be used to reference the view explicitly (a no-op). 744 exp.alias_( 745 table, 746 t.cast(exp.Identifier, alias or table_parts[-1]), 747 table=True, 748 copy=False, 749 ) 750 751 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 752 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 753 line=table_parts[-2].meta.get("line"), 754 col=table_parts[-1].meta.get("col"), 755 start=table_parts[-2].meta.get("start"), 756 end=table_parts[-1].meta.get("end"), 757 ) 758 table.set("this", new_this) 759 table.set("db", seq_get(table_parts, -3)) 760 table.set("catalog", seq_get(table_parts, -4)) 761 762 return table 763 764 def _parse_column(self) -> t.Optional[exp.Expression]: 765 column = super()._parse_column() 766 if isinstance(column, exp.Column): 767 parts = column.parts 768 if any("." in p.name for p in parts): 769 catalog, db, table, this, *rest = ( 770 exp.to_identifier(p, quoted=True) 771 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 772 ) 773 774 if rest and this: 775 this = exp.Dot.build([this, *rest]) # type: ignore 776 777 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 778 column.meta["quoted_column"] = True 779 780 return column 781 782 @t.overload 783 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 784 785 @t.overload 786 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 787 788 def _parse_json_object(self, agg=False): 789 json_object = super()._parse_json_object() 790 array_kv_pair = seq_get(json_object.expressions, 0) 791 792 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 793 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 794 if ( 795 array_kv_pair 796 and isinstance(array_kv_pair.this, exp.Array) 797 and isinstance(array_kv_pair.expression, exp.Array) 798 ): 799 keys = array_kv_pair.this.expressions 800 values = array_kv_pair.expression.expressions 801 802 json_object.set( 803 "expressions", 804 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 805 ) 806 807 return json_object 808 809 def _parse_bracket( 810 self, this: t.Optional[exp.Expression] = None 811 ) -> t.Optional[exp.Expression]: 812 bracket = super()._parse_bracket(this) 813 814 if this is bracket: 815 return bracket 816 817 if isinstance(bracket, exp.Bracket): 818 for expression in bracket.expressions: 819 name = expression.name.upper() 820 821 if name not in self.BRACKET_OFFSETS: 822 break 823 824 offset, safe = self.BRACKET_OFFSETS[name] 825 bracket.set("offset", offset) 826 bracket.set("safe", safe) 827 expression.replace(expression.expressions[0]) 828 829 return bracket 830 831 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 832 unnest = super()._parse_unnest(with_alias=with_alias) 833 834 if not unnest: 835 return None 836 837 unnest_expr = seq_get(unnest.expressions, 0) 838 if unnest_expr: 839 from sqlglot.optimizer.annotate_types import annotate_types 840 841 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 842 843 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 844 # in contrast to other dialects such as DuckDB which flattens only the array by default 845 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 846 array_elem.is_type(exp.DataType.Type.STRUCT) 847 for array_elem in unnest_expr._type.expressions 848 ): 849 unnest.set("explode_array", True) 850 851 return unnest 852 853 def _parse_make_interval(self) -> exp.MakeInterval: 854 expr = exp.MakeInterval() 855 856 for arg_key in expr.arg_types: 857 value = self._parse_lambda() 858 859 if not value: 860 break 861 862 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 863 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 864 if isinstance(value, exp.Kwarg): 865 arg_key = value.this.name 866 867 expr.set(arg_key, value) 868 869 self._match(TokenType.COMMA) 870 871 return expr 872 873 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 874 expr = self.expression( 875 exp.FeaturesAtTime, 876 this=(self._match(TokenType.TABLE) and self._parse_table()) 877 or self._parse_select(nested=True), 878 ) 879 880 while self._match(TokenType.COMMA): 881 arg = self._parse_lambda() 882 883 # Get the LHS of the Kwarg and set the arg to that value, e.g 884 # "num_rows => 1" sets the expr's `num_rows` arg 885 if arg: 886 expr.set(arg.this.name, arg) 887 888 return expr 889 890 def _parse_export_data(self) -> exp.Export: 891 self._match_text_seq("DATA") 892 893 return self.expression( 894 exp.Export, 895 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 896 options=self._parse_properties(), 897 this=self._match_text_seq("AS") and self._parse_select(), 898 ) 899 900 class Generator(generator.Generator): 901 INTERVAL_ALLOWS_PLURAL_FORM = False 902 JOIN_HINTS = False 903 QUERY_HINTS = False 904 TABLE_HINTS = False 905 LIMIT_FETCH = "LIMIT" 906 RENAME_TABLE_WITH_DB = False 907 NVL2_SUPPORTED = False 908 UNNEST_WITH_ORDINALITY = False 909 COLLATE_IS_FUNC = True 910 LIMIT_ONLY_LITERALS = True 911 SUPPORTS_TABLE_ALIAS_COLUMNS = False 912 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 913 JSON_KEY_VALUE_PAIR_SEP = "," 914 NULL_ORDERING_SUPPORTED = False 915 IGNORE_NULLS_IN_FUNC = True 916 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 917 CAN_IMPLEMENT_ARRAY_ANY = True 918 SUPPORTS_TO_NUMBER = False 919 NAMED_PLACEHOLDER_TOKEN = "@" 920 HEX_FUNC = "TO_HEX" 921 WITH_PROPERTIES_PREFIX = "OPTIONS" 922 SUPPORTS_EXPLODING_PROJECTIONS = False 923 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 924 SUPPORTS_UNIX_SECONDS = True 925 926 TRANSFORMS = { 927 **generator.Generator.TRANSFORMS, 928 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 929 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 930 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 931 exp.Array: inline_array_unless_query, 932 exp.ArrayContains: _array_contains_sql, 933 exp.ArrayFilter: filter_array_using_unnest, 934 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 935 exp.CollateProperty: lambda self, e: ( 936 f"DEFAULT COLLATE {self.sql(e, 'this')}" 937 if e.args.get("default") 938 else f"COLLATE {self.sql(e, 'this')}" 939 ), 940 exp.Commit: lambda *_: "COMMIT TRANSACTION", 941 exp.CountIf: rename_func("COUNTIF"), 942 exp.Create: _create_sql, 943 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 944 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 945 exp.DateDiff: lambda self, e: self.func( 946 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 947 ), 948 exp.DateFromParts: rename_func("DATE"), 949 exp.DateStrToDate: datestrtodate_sql, 950 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 951 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 952 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 953 exp.DateTrunc: lambda self, e: self.func( 954 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 955 ), 956 exp.FromTimeZone: lambda self, e: self.func( 957 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 958 ), 959 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 960 exp.GroupConcat: lambda self, e: groupconcat_sql( 961 self, e, func_name="STRING_AGG", within_group=False 962 ), 963 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 964 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 965 exp.If: if_sql(false_value="NULL"), 966 exp.ILike: no_ilike_sql, 967 exp.IntDiv: rename_func("DIV"), 968 exp.Int64: rename_func("INT64"), 969 exp.JSONExtract: _json_extract_sql, 970 exp.JSONExtractArray: _json_extract_sql, 971 exp.JSONExtractScalar: _json_extract_sql, 972 exp.JSONFormat: rename_func("TO_JSON_STRING"), 973 exp.Levenshtein: _levenshtein_sql, 974 exp.Max: max_or_greatest, 975 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 976 exp.MD5Digest: rename_func("MD5"), 977 exp.Min: min_or_least, 978 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 979 exp.RegexpExtract: lambda self, e: self.func( 980 "REGEXP_EXTRACT", 981 e.this, 982 e.expression, 983 e.args.get("position"), 984 e.args.get("occurrence"), 985 ), 986 exp.RegexpExtractAll: lambda self, e: self.func( 987 "REGEXP_EXTRACT_ALL", e.this, e.expression 988 ), 989 exp.RegexpReplace: regexp_replace_sql, 990 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 991 exp.ReturnsProperty: _returnsproperty_sql, 992 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 993 exp.Select: transforms.preprocess( 994 [ 995 transforms.explode_projection_to_unnest(), 996 transforms.unqualify_unnest, 997 transforms.eliminate_distinct_on, 998 _alias_ordered_group, 999 transforms.eliminate_semi_and_anti_joins, 1000 ] 1001 ), 1002 exp.SHA: rename_func("SHA1"), 1003 exp.SHA2: sha256_sql, 1004 exp.StabilityProperty: lambda self, e: ( 1005 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1006 ), 1007 exp.String: rename_func("STRING"), 1008 exp.StrPosition: lambda self, e: ( 1009 strposition_sql( 1010 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1011 ) 1012 ), 1013 exp.StrToDate: _str_to_datetime_sql, 1014 exp.StrToTime: _str_to_datetime_sql, 1015 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1016 exp.TimeFromParts: rename_func("TIME"), 1017 exp.TimestampFromParts: rename_func("DATETIME"), 1018 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1019 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1020 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1021 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1022 exp.TimeStrToTime: timestrtotime_sql, 1023 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1024 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1025 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1026 exp.TsOrDsToTime: rename_func("TIME"), 1027 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1028 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1029 exp.Unhex: rename_func("FROM_HEX"), 1030 exp.UnixDate: rename_func("UNIX_DATE"), 1031 exp.UnixToTime: _unix_to_time_sql, 1032 exp.Uuid: lambda *_: "GENERATE_UUID()", 1033 exp.Values: _derived_table_values_to_unnest, 1034 exp.VariancePop: rename_func("VAR_POP"), 1035 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1036 } 1037 1038 SUPPORTED_JSON_PATH_PARTS = { 1039 exp.JSONPathKey, 1040 exp.JSONPathRoot, 1041 exp.JSONPathSubscript, 1042 } 1043 1044 TYPE_MAPPING = { 1045 **generator.Generator.TYPE_MAPPING, 1046 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1047 exp.DataType.Type.BIGINT: "INT64", 1048 exp.DataType.Type.BINARY: "BYTES", 1049 exp.DataType.Type.BLOB: "BYTES", 1050 exp.DataType.Type.BOOLEAN: "BOOL", 1051 exp.DataType.Type.CHAR: "STRING", 1052 exp.DataType.Type.DECIMAL: "NUMERIC", 1053 exp.DataType.Type.DOUBLE: "FLOAT64", 1054 exp.DataType.Type.FLOAT: "FLOAT64", 1055 exp.DataType.Type.INT: "INT64", 1056 exp.DataType.Type.NCHAR: "STRING", 1057 exp.DataType.Type.NVARCHAR: "STRING", 1058 exp.DataType.Type.SMALLINT: "INT64", 1059 exp.DataType.Type.TEXT: "STRING", 1060 exp.DataType.Type.TIMESTAMP: "DATETIME", 1061 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1062 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1063 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1064 exp.DataType.Type.TINYINT: "INT64", 1065 exp.DataType.Type.ROWVERSION: "BYTES", 1066 exp.DataType.Type.UUID: "STRING", 1067 exp.DataType.Type.VARBINARY: "BYTES", 1068 exp.DataType.Type.VARCHAR: "STRING", 1069 exp.DataType.Type.VARIANT: "ANY TYPE", 1070 } 1071 1072 PROPERTIES_LOCATION = { 1073 **generator.Generator.PROPERTIES_LOCATION, 1074 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1075 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1076 } 1077 1078 # WINDOW comes after QUALIFY 1079 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1080 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1081 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1082 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1083 } 1084 1085 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1086 RESERVED_KEYWORDS = { 1087 "all", 1088 "and", 1089 "any", 1090 "array", 1091 "as", 1092 "asc", 1093 "assert_rows_modified", 1094 "at", 1095 "between", 1096 "by", 1097 "case", 1098 "cast", 1099 "collate", 1100 "contains", 1101 "create", 1102 "cross", 1103 "cube", 1104 "current", 1105 "default", 1106 "define", 1107 "desc", 1108 "distinct", 1109 "else", 1110 "end", 1111 "enum", 1112 "escape", 1113 "except", 1114 "exclude", 1115 "exists", 1116 "extract", 1117 "false", 1118 "fetch", 1119 "following", 1120 "for", 1121 "from", 1122 "full", 1123 "group", 1124 "grouping", 1125 "groups", 1126 "hash", 1127 "having", 1128 "if", 1129 "ignore", 1130 "in", 1131 "inner", 1132 "intersect", 1133 "interval", 1134 "into", 1135 "is", 1136 "join", 1137 "lateral", 1138 "left", 1139 "like", 1140 "limit", 1141 "lookup", 1142 "merge", 1143 "natural", 1144 "new", 1145 "no", 1146 "not", 1147 "null", 1148 "nulls", 1149 "of", 1150 "on", 1151 "or", 1152 "order", 1153 "outer", 1154 "over", 1155 "partition", 1156 "preceding", 1157 "proto", 1158 "qualify", 1159 "range", 1160 "recursive", 1161 "respect", 1162 "right", 1163 "rollup", 1164 "rows", 1165 "select", 1166 "set", 1167 "some", 1168 "struct", 1169 "tablesample", 1170 "then", 1171 "to", 1172 "treat", 1173 "true", 1174 "unbounded", 1175 "union", 1176 "unnest", 1177 "using", 1178 "when", 1179 "where", 1180 "window", 1181 "with", 1182 "within", 1183 } 1184 1185 def mod_sql(self, expression: exp.Mod) -> str: 1186 this = expression.this 1187 expr = expression.expression 1188 return self.func( 1189 "MOD", 1190 this.unnest() if isinstance(this, exp.Paren) else this, 1191 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1192 ) 1193 1194 def column_parts(self, expression: exp.Column) -> str: 1195 if expression.meta.get("quoted_column"): 1196 # If a column reference is of the form `dataset.table`.name, we need 1197 # to preserve the quoted table path, otherwise the reference breaks 1198 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1199 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1200 return f"{table_path}.{self.sql(expression, 'this')}" 1201 1202 return super().column_parts(expression) 1203 1204 def table_parts(self, expression: exp.Table) -> str: 1205 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1206 # we need to make sure the correct quoting is used in each case. 1207 # 1208 # For example, if there is a CTE x that clashes with a schema name, then the former will 1209 # return the table y in that schema, whereas the latter will return the CTE's y column: 1210 # 1211 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1212 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1213 if expression.meta.get("quoted_table"): 1214 table_parts = ".".join(p.name for p in expression.parts) 1215 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1216 1217 return super().table_parts(expression) 1218 1219 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1220 this = expression.this 1221 if isinstance(this, exp.TsOrDsToDatetime): 1222 func_name = "FORMAT_DATETIME" 1223 elif isinstance(this, exp.TsOrDsToTimestamp): 1224 func_name = "FORMAT_TIMESTAMP" 1225 else: 1226 func_name = "FORMAT_DATE" 1227 1228 time_expr = ( 1229 this 1230 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1231 else expression 1232 ) 1233 return self.func( 1234 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1235 ) 1236 1237 def eq_sql(self, expression: exp.EQ) -> str: 1238 # Operands of = cannot be NULL in BigQuery 1239 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1240 if not isinstance(expression.parent, exp.Update): 1241 return "NULL" 1242 1243 return self.binary(expression, "=") 1244 1245 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1246 parent = expression.parent 1247 1248 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1249 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1250 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1251 return self.func( 1252 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1253 ) 1254 1255 return super().attimezone_sql(expression) 1256 1257 def trycast_sql(self, expression: exp.TryCast) -> str: 1258 return self.cast_sql(expression, safe_prefix="SAFE_") 1259 1260 def bracket_sql(self, expression: exp.Bracket) -> str: 1261 this = expression.this 1262 expressions = expression.expressions 1263 1264 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1265 arg = expressions[0] 1266 if arg.type is None: 1267 from sqlglot.optimizer.annotate_types import annotate_types 1268 1269 arg = annotate_types(arg, dialect=self.dialect) 1270 1271 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1272 # BQ doesn't support bracket syntax with string values for structs 1273 return f"{self.sql(this)}.{arg.name}" 1274 1275 expressions_sql = self.expressions(expression, flat=True) 1276 offset = expression.args.get("offset") 1277 1278 if offset == 0: 1279 expressions_sql = f"OFFSET({expressions_sql})" 1280 elif offset == 1: 1281 expressions_sql = f"ORDINAL({expressions_sql})" 1282 elif offset is not None: 1283 self.unsupported(f"Unsupported array offset: {offset}") 1284 1285 if expression.args.get("safe"): 1286 expressions_sql = f"SAFE_{expressions_sql}" 1287 1288 return f"{self.sql(this)}[{expressions_sql}]" 1289 1290 def in_unnest_op(self, expression: exp.Unnest) -> str: 1291 return self.sql(expression) 1292 1293 def version_sql(self, expression: exp.Version) -> str: 1294 if expression.name == "TIMESTAMP": 1295 expression.set("this", "SYSTEM_TIME") 1296 return super().version_sql(expression) 1297 1298 def contains_sql(self, expression: exp.Contains) -> str: 1299 this = expression.this 1300 expr = expression.expression 1301 1302 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1303 this = this.this 1304 expr = expr.this 1305 1306 return self.func("CONTAINS_SUBSTR", this, expr) 1307 1308 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1309 this = expression.this 1310 1311 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1312 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1313 # because they aren't literals and so the above syntax is invalid BigQuery. 1314 if isinstance(this, exp.Array): 1315 elem = seq_get(this.expressions, 0) 1316 if not (elem and elem.find(exp.Query)): 1317 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1318 1319 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
453 def normalize_identifier(self, expression: E) -> E: 454 if ( 455 isinstance(expression, exp.Identifier) 456 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 457 ): 458 parent = expression.parent 459 while isinstance(parent, exp.Dot): 460 parent = parent.parent 461 462 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 463 # by default. The following check uses a heuristic to detect tables based on whether 464 # they are qualified. This should generally be correct, because tables in BigQuery 465 # must be qualified with at least a dataset, unless @@dataset_id is set. 466 case_sensitive = ( 467 isinstance(parent, exp.UserDefinedFunction) 468 or ( 469 isinstance(parent, exp.Table) 470 and parent.db 471 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 472 ) 473 or expression.meta.get("is_table") 474 ) 475 if not case_sensitive: 476 expression.set("this", expression.this.lower()) 477 478 return t.cast(E, expression) 479 480 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
482 class Tokenizer(tokens.Tokenizer): 483 QUOTES = ["'", '"', '"""', "'''"] 484 COMMENTS = ["--", "#", ("/*", "*/")] 485 IDENTIFIERS = ["`"] 486 STRING_ESCAPES = ["\\"] 487 488 HEX_STRINGS = [("0x", ""), ("0X", "")] 489 490 BYTE_STRINGS = [ 491 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 492 ] 493 494 RAW_STRINGS = [ 495 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 496 ] 497 498 KEYWORDS = { 499 **tokens.Tokenizer.KEYWORDS, 500 "ANY TYPE": TokenType.VARIANT, 501 "BEGIN": TokenType.COMMAND, 502 "BEGIN TRANSACTION": TokenType.BEGIN, 503 "BYTEINT": TokenType.INT, 504 "BYTES": TokenType.BINARY, 505 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 506 "DATETIME": TokenType.TIMESTAMP, 507 "DECLARE": TokenType.COMMAND, 508 "ELSEIF": TokenType.COMMAND, 509 "EXCEPTION": TokenType.COMMAND, 510 "EXPORT": TokenType.EXPORT, 511 "FLOAT64": TokenType.DOUBLE, 512 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 513 "MODEL": TokenType.MODEL, 514 "NOT DETERMINISTIC": TokenType.VOLATILE, 515 "RECORD": TokenType.STRUCT, 516 "TIMESTAMP": TokenType.TIMESTAMPTZ, 517 } 518 KEYWORDS.pop("DIV") 519 KEYWORDS.pop("VALUES") 520 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
522 class Parser(parser.Parser): 523 PREFIXED_PIVOT_COLUMNS = True 524 LOG_DEFAULTS_TO_LN = True 525 SUPPORTS_IMPLICIT_UNNEST = True 526 527 FUNCTIONS = { 528 **parser.Parser.FUNCTIONS, 529 "CONTAINS_SUBSTR": _build_contains_substring, 530 "DATE": _build_date, 531 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 532 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 533 "DATE_TRUNC": lambda args: exp.DateTrunc( 534 unit=exp.Literal.string(str(seq_get(args, 1))), 535 this=seq_get(args, 0), 536 zone=seq_get(args, 2), 537 ), 538 "DATETIME": _build_datetime, 539 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 540 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 541 "DIV": binary_from_function(exp.IntDiv), 542 "EDIT_DISTANCE": _build_levenshtein, 543 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 544 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 545 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 546 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 547 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 548 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 549 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 550 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 551 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 552 "MD5": exp.MD5Digest.from_arg_list, 553 "TO_HEX": _build_to_hex, 554 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 555 [seq_get(args, 1), seq_get(args, 0)] 556 ), 557 "PARSE_TIMESTAMP": _build_parse_timestamp, 558 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 559 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 560 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 561 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 562 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 563 ), 564 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 565 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 566 "SPLIT": lambda args: exp.Split( 567 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 568 this=seq_get(args, 0), 569 expression=seq_get(args, 1) or exp.Literal.string(","), 570 ), 571 "STRPOS": exp.StrPosition.from_arg_list, 572 "TIME": _build_time, 573 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 574 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 575 "TIMESTAMP": _build_timestamp, 576 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 577 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 578 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 579 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 580 ), 581 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 582 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 583 ), 584 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 585 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 586 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 587 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 588 } 589 590 FUNCTION_PARSERS = { 591 **parser.Parser.FUNCTION_PARSERS, 592 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 593 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 594 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 595 } 596 FUNCTION_PARSERS.pop("TRIM") 597 598 NO_PAREN_FUNCTIONS = { 599 **parser.Parser.NO_PAREN_FUNCTIONS, 600 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 601 } 602 603 NESTED_TYPE_TOKENS = { 604 *parser.Parser.NESTED_TYPE_TOKENS, 605 TokenType.TABLE, 606 } 607 608 PROPERTY_PARSERS = { 609 **parser.Parser.PROPERTY_PARSERS, 610 "NOT DETERMINISTIC": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 612 ), 613 "OPTIONS": lambda self: self._parse_with_property(), 614 } 615 616 CONSTRAINT_PARSERS = { 617 **parser.Parser.CONSTRAINT_PARSERS, 618 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 619 } 620 621 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 622 RANGE_PARSERS.pop(TokenType.OVERLAPS) 623 624 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 625 626 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 627 628 STATEMENT_PARSERS = { 629 **parser.Parser.STATEMENT_PARSERS, 630 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 631 TokenType.END: lambda self: self._parse_as_command(self._prev), 632 TokenType.FOR: lambda self: self._parse_for_in(), 633 TokenType.EXPORT: lambda self: self._parse_export_data(), 634 } 635 636 BRACKET_OFFSETS = { 637 "OFFSET": (0, False), 638 "ORDINAL": (1, False), 639 "SAFE_OFFSET": (0, True), 640 "SAFE_ORDINAL": (1, True), 641 } 642 643 def _parse_for_in(self) -> exp.ForIn: 644 this = self._parse_range() 645 self._match_text_seq("DO") 646 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 647 648 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 649 this = super()._parse_table_part(schema=schema) or self._parse_number() 650 651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 652 if isinstance(this, exp.Identifier): 653 table_name = this.name 654 while self._match(TokenType.DASH, advance=False) and self._next: 655 start = self._curr 656 while self._is_connected() and not self._match_set( 657 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 658 ): 659 self._advance() 660 661 if start == self._curr: 662 break 663 664 table_name += self._find_sql(start, self._prev) 665 666 this = exp.Identifier( 667 this=table_name, quoted=this.args.get("quoted") 668 ).update_positions(this) 669 elif isinstance(this, exp.Literal): 670 table_name = this.name 671 672 if self._is_connected() and self._parse_var(any_token=True): 673 table_name += self._prev.text 674 675 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 676 677 return this 678 679 def _parse_table_parts( 680 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 681 ) -> exp.Table: 682 table = super()._parse_table_parts( 683 schema=schema, is_db_reference=is_db_reference, wildcard=True 684 ) 685 686 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 687 if not table.catalog: 688 if table.db: 689 previous_db = table.args["db"] 690 parts = table.db.split(".") 691 if len(parts) == 2 and not table.args["db"].quoted: 692 table.set( 693 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 694 ) 695 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 696 else: 697 previous_this = table.this 698 parts = table.name.split(".") 699 if len(parts) == 2 and not table.this.quoted: 700 table.set( 701 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 702 ) 703 table.set( 704 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 705 ) 706 707 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 708 alias = table.this 709 catalog, db, this, *rest = ( 710 exp.to_identifier(p, quoted=True) 711 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 712 ) 713 714 for part in (catalog, db, this): 715 if part: 716 part.update_positions(table.this) 717 718 if rest and this: 719 this = exp.Dot.build([this, *rest]) # type: ignore 720 721 table = exp.Table( 722 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 723 ) 724 table.meta["quoted_table"] = True 725 else: 726 alias = None 727 728 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 729 # dataset, so if the project identifier is omitted we need to fix the ast so that 730 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 731 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 732 # views, because it would seem like the "catalog" part is set, when it'd actually 733 # be the region/dataset. Merging the two identifiers into a single one is done to 734 # avoid producing a 4-part Table reference, which would cause issues in the schema 735 # module, when there are 3-part table names mixed with information schema views. 736 # 737 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 738 table_parts = table.parts 739 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 740 # We need to alias the table here to avoid breaking existing qualified columns. 741 # This is expected to be safe, because if there's an actual alias coming up in 742 # the token stream, it will overwrite this one. If there isn't one, we are only 743 # exposing the name that can be used to reference the view explicitly (a no-op). 744 exp.alias_( 745 table, 746 t.cast(exp.Identifier, alias or table_parts[-1]), 747 table=True, 748 copy=False, 749 ) 750 751 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 752 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 753 line=table_parts[-2].meta.get("line"), 754 col=table_parts[-1].meta.get("col"), 755 start=table_parts[-2].meta.get("start"), 756 end=table_parts[-1].meta.get("end"), 757 ) 758 table.set("this", new_this) 759 table.set("db", seq_get(table_parts, -3)) 760 table.set("catalog", seq_get(table_parts, -4)) 761 762 return table 763 764 def _parse_column(self) -> t.Optional[exp.Expression]: 765 column = super()._parse_column() 766 if isinstance(column, exp.Column): 767 parts = column.parts 768 if any("." in p.name for p in parts): 769 catalog, db, table, this, *rest = ( 770 exp.to_identifier(p, quoted=True) 771 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 772 ) 773 774 if rest and this: 775 this = exp.Dot.build([this, *rest]) # type: ignore 776 777 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 778 column.meta["quoted_column"] = True 779 780 return column 781 782 @t.overload 783 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 784 785 @t.overload 786 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 787 788 def _parse_json_object(self, agg=False): 789 json_object = super()._parse_json_object() 790 array_kv_pair = seq_get(json_object.expressions, 0) 791 792 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 793 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 794 if ( 795 array_kv_pair 796 and isinstance(array_kv_pair.this, exp.Array) 797 and isinstance(array_kv_pair.expression, exp.Array) 798 ): 799 keys = array_kv_pair.this.expressions 800 values = array_kv_pair.expression.expressions 801 802 json_object.set( 803 "expressions", 804 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 805 ) 806 807 return json_object 808 809 def _parse_bracket( 810 self, this: t.Optional[exp.Expression] = None 811 ) -> t.Optional[exp.Expression]: 812 bracket = super()._parse_bracket(this) 813 814 if this is bracket: 815 return bracket 816 817 if isinstance(bracket, exp.Bracket): 818 for expression in bracket.expressions: 819 name = expression.name.upper() 820 821 if name not in self.BRACKET_OFFSETS: 822 break 823 824 offset, safe = self.BRACKET_OFFSETS[name] 825 bracket.set("offset", offset) 826 bracket.set("safe", safe) 827 expression.replace(expression.expressions[0]) 828 829 return bracket 830 831 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 832 unnest = super()._parse_unnest(with_alias=with_alias) 833 834 if not unnest: 835 return None 836 837 unnest_expr = seq_get(unnest.expressions, 0) 838 if unnest_expr: 839 from sqlglot.optimizer.annotate_types import annotate_types 840 841 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 842 843 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 844 # in contrast to other dialects such as DuckDB which flattens only the array by default 845 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 846 array_elem.is_type(exp.DataType.Type.STRUCT) 847 for array_elem in unnest_expr._type.expressions 848 ): 849 unnest.set("explode_array", True) 850 851 return unnest 852 853 def _parse_make_interval(self) -> exp.MakeInterval: 854 expr = exp.MakeInterval() 855 856 for arg_key in expr.arg_types: 857 value = self._parse_lambda() 858 859 if not value: 860 break 861 862 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 863 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 864 if isinstance(value, exp.Kwarg): 865 arg_key = value.this.name 866 867 expr.set(arg_key, value) 868 869 self._match(TokenType.COMMA) 870 871 return expr 872 873 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 874 expr = self.expression( 875 exp.FeaturesAtTime, 876 this=(self._match(TokenType.TABLE) and self._parse_table()) 877 or self._parse_select(nested=True), 878 ) 879 880 while self._match(TokenType.COMMA): 881 arg = self._parse_lambda() 882 883 # Get the LHS of the Kwarg and set the arg to that value, e.g 884 # "num_rows => 1" sets the expr's `num_rows` arg 885 if arg: 886 expr.set(arg.this.name, arg) 887 888 return expr 889 890 def _parse_export_data(self) -> exp.Export: 891 self._match_text_seq("DATA") 892 893 return self.expression( 894 exp.Export, 895 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 896 options=self._parse_properties(), 897 this=self._match_text_seq("AS") and self._parse_select(), 898 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
900 class Generator(generator.Generator): 901 INTERVAL_ALLOWS_PLURAL_FORM = False 902 JOIN_HINTS = False 903 QUERY_HINTS = False 904 TABLE_HINTS = False 905 LIMIT_FETCH = "LIMIT" 906 RENAME_TABLE_WITH_DB = False 907 NVL2_SUPPORTED = False 908 UNNEST_WITH_ORDINALITY = False 909 COLLATE_IS_FUNC = True 910 LIMIT_ONLY_LITERALS = True 911 SUPPORTS_TABLE_ALIAS_COLUMNS = False 912 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 913 JSON_KEY_VALUE_PAIR_SEP = "," 914 NULL_ORDERING_SUPPORTED = False 915 IGNORE_NULLS_IN_FUNC = True 916 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 917 CAN_IMPLEMENT_ARRAY_ANY = True 918 SUPPORTS_TO_NUMBER = False 919 NAMED_PLACEHOLDER_TOKEN = "@" 920 HEX_FUNC = "TO_HEX" 921 WITH_PROPERTIES_PREFIX = "OPTIONS" 922 SUPPORTS_EXPLODING_PROJECTIONS = False 923 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 924 SUPPORTS_UNIX_SECONDS = True 925 926 TRANSFORMS = { 927 **generator.Generator.TRANSFORMS, 928 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 929 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 930 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 931 exp.Array: inline_array_unless_query, 932 exp.ArrayContains: _array_contains_sql, 933 exp.ArrayFilter: filter_array_using_unnest, 934 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 935 exp.CollateProperty: lambda self, e: ( 936 f"DEFAULT COLLATE {self.sql(e, 'this')}" 937 if e.args.get("default") 938 else f"COLLATE {self.sql(e, 'this')}" 939 ), 940 exp.Commit: lambda *_: "COMMIT TRANSACTION", 941 exp.CountIf: rename_func("COUNTIF"), 942 exp.Create: _create_sql, 943 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 944 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 945 exp.DateDiff: lambda self, e: self.func( 946 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 947 ), 948 exp.DateFromParts: rename_func("DATE"), 949 exp.DateStrToDate: datestrtodate_sql, 950 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 951 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 952 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 953 exp.DateTrunc: lambda self, e: self.func( 954 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 955 ), 956 exp.FromTimeZone: lambda self, e: self.func( 957 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 958 ), 959 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 960 exp.GroupConcat: lambda self, e: groupconcat_sql( 961 self, e, func_name="STRING_AGG", within_group=False 962 ), 963 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 964 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 965 exp.If: if_sql(false_value="NULL"), 966 exp.ILike: no_ilike_sql, 967 exp.IntDiv: rename_func("DIV"), 968 exp.Int64: rename_func("INT64"), 969 exp.JSONExtract: _json_extract_sql, 970 exp.JSONExtractArray: _json_extract_sql, 971 exp.JSONExtractScalar: _json_extract_sql, 972 exp.JSONFormat: rename_func("TO_JSON_STRING"), 973 exp.Levenshtein: _levenshtein_sql, 974 exp.Max: max_or_greatest, 975 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 976 exp.MD5Digest: rename_func("MD5"), 977 exp.Min: min_or_least, 978 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 979 exp.RegexpExtract: lambda self, e: self.func( 980 "REGEXP_EXTRACT", 981 e.this, 982 e.expression, 983 e.args.get("position"), 984 e.args.get("occurrence"), 985 ), 986 exp.RegexpExtractAll: lambda self, e: self.func( 987 "REGEXP_EXTRACT_ALL", e.this, e.expression 988 ), 989 exp.RegexpReplace: regexp_replace_sql, 990 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 991 exp.ReturnsProperty: _returnsproperty_sql, 992 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 993 exp.Select: transforms.preprocess( 994 [ 995 transforms.explode_projection_to_unnest(), 996 transforms.unqualify_unnest, 997 transforms.eliminate_distinct_on, 998 _alias_ordered_group, 999 transforms.eliminate_semi_and_anti_joins, 1000 ] 1001 ), 1002 exp.SHA: rename_func("SHA1"), 1003 exp.SHA2: sha256_sql, 1004 exp.StabilityProperty: lambda self, e: ( 1005 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1006 ), 1007 exp.String: rename_func("STRING"), 1008 exp.StrPosition: lambda self, e: ( 1009 strposition_sql( 1010 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1011 ) 1012 ), 1013 exp.StrToDate: _str_to_datetime_sql, 1014 exp.StrToTime: _str_to_datetime_sql, 1015 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1016 exp.TimeFromParts: rename_func("TIME"), 1017 exp.TimestampFromParts: rename_func("DATETIME"), 1018 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1019 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1020 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1021 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1022 exp.TimeStrToTime: timestrtotime_sql, 1023 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1024 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1025 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1026 exp.TsOrDsToTime: rename_func("TIME"), 1027 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1028 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1029 exp.Unhex: rename_func("FROM_HEX"), 1030 exp.UnixDate: rename_func("UNIX_DATE"), 1031 exp.UnixToTime: _unix_to_time_sql, 1032 exp.Uuid: lambda *_: "GENERATE_UUID()", 1033 exp.Values: _derived_table_values_to_unnest, 1034 exp.VariancePop: rename_func("VAR_POP"), 1035 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1036 } 1037 1038 SUPPORTED_JSON_PATH_PARTS = { 1039 exp.JSONPathKey, 1040 exp.JSONPathRoot, 1041 exp.JSONPathSubscript, 1042 } 1043 1044 TYPE_MAPPING = { 1045 **generator.Generator.TYPE_MAPPING, 1046 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1047 exp.DataType.Type.BIGINT: "INT64", 1048 exp.DataType.Type.BINARY: "BYTES", 1049 exp.DataType.Type.BLOB: "BYTES", 1050 exp.DataType.Type.BOOLEAN: "BOOL", 1051 exp.DataType.Type.CHAR: "STRING", 1052 exp.DataType.Type.DECIMAL: "NUMERIC", 1053 exp.DataType.Type.DOUBLE: "FLOAT64", 1054 exp.DataType.Type.FLOAT: "FLOAT64", 1055 exp.DataType.Type.INT: "INT64", 1056 exp.DataType.Type.NCHAR: "STRING", 1057 exp.DataType.Type.NVARCHAR: "STRING", 1058 exp.DataType.Type.SMALLINT: "INT64", 1059 exp.DataType.Type.TEXT: "STRING", 1060 exp.DataType.Type.TIMESTAMP: "DATETIME", 1061 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1062 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1063 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1064 exp.DataType.Type.TINYINT: "INT64", 1065 exp.DataType.Type.ROWVERSION: "BYTES", 1066 exp.DataType.Type.UUID: "STRING", 1067 exp.DataType.Type.VARBINARY: "BYTES", 1068 exp.DataType.Type.VARCHAR: "STRING", 1069 exp.DataType.Type.VARIANT: "ANY TYPE", 1070 } 1071 1072 PROPERTIES_LOCATION = { 1073 **generator.Generator.PROPERTIES_LOCATION, 1074 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1075 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1076 } 1077 1078 # WINDOW comes after QUALIFY 1079 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1080 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1081 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1082 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1083 } 1084 1085 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1086 RESERVED_KEYWORDS = { 1087 "all", 1088 "and", 1089 "any", 1090 "array", 1091 "as", 1092 "asc", 1093 "assert_rows_modified", 1094 "at", 1095 "between", 1096 "by", 1097 "case", 1098 "cast", 1099 "collate", 1100 "contains", 1101 "create", 1102 "cross", 1103 "cube", 1104 "current", 1105 "default", 1106 "define", 1107 "desc", 1108 "distinct", 1109 "else", 1110 "end", 1111 "enum", 1112 "escape", 1113 "except", 1114 "exclude", 1115 "exists", 1116 "extract", 1117 "false", 1118 "fetch", 1119 "following", 1120 "for", 1121 "from", 1122 "full", 1123 "group", 1124 "grouping", 1125 "groups", 1126 "hash", 1127 "having", 1128 "if", 1129 "ignore", 1130 "in", 1131 "inner", 1132 "intersect", 1133 "interval", 1134 "into", 1135 "is", 1136 "join", 1137 "lateral", 1138 "left", 1139 "like", 1140 "limit", 1141 "lookup", 1142 "merge", 1143 "natural", 1144 "new", 1145 "no", 1146 "not", 1147 "null", 1148 "nulls", 1149 "of", 1150 "on", 1151 "or", 1152 "order", 1153 "outer", 1154 "over", 1155 "partition", 1156 "preceding", 1157 "proto", 1158 "qualify", 1159 "range", 1160 "recursive", 1161 "respect", 1162 "right", 1163 "rollup", 1164 "rows", 1165 "select", 1166 "set", 1167 "some", 1168 "struct", 1169 "tablesample", 1170 "then", 1171 "to", 1172 "treat", 1173 "true", 1174 "unbounded", 1175 "union", 1176 "unnest", 1177 "using", 1178 "when", 1179 "where", 1180 "window", 1181 "with", 1182 "within", 1183 } 1184 1185 def mod_sql(self, expression: exp.Mod) -> str: 1186 this = expression.this 1187 expr = expression.expression 1188 return self.func( 1189 "MOD", 1190 this.unnest() if isinstance(this, exp.Paren) else this, 1191 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1192 ) 1193 1194 def column_parts(self, expression: exp.Column) -> str: 1195 if expression.meta.get("quoted_column"): 1196 # If a column reference is of the form `dataset.table`.name, we need 1197 # to preserve the quoted table path, otherwise the reference breaks 1198 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1199 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1200 return f"{table_path}.{self.sql(expression, 'this')}" 1201 1202 return super().column_parts(expression) 1203 1204 def table_parts(self, expression: exp.Table) -> str: 1205 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1206 # we need to make sure the correct quoting is used in each case. 1207 # 1208 # For example, if there is a CTE x that clashes with a schema name, then the former will 1209 # return the table y in that schema, whereas the latter will return the CTE's y column: 1210 # 1211 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1212 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1213 if expression.meta.get("quoted_table"): 1214 table_parts = ".".join(p.name for p in expression.parts) 1215 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1216 1217 return super().table_parts(expression) 1218 1219 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1220 this = expression.this 1221 if isinstance(this, exp.TsOrDsToDatetime): 1222 func_name = "FORMAT_DATETIME" 1223 elif isinstance(this, exp.TsOrDsToTimestamp): 1224 func_name = "FORMAT_TIMESTAMP" 1225 else: 1226 func_name = "FORMAT_DATE" 1227 1228 time_expr = ( 1229 this 1230 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1231 else expression 1232 ) 1233 return self.func( 1234 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1235 ) 1236 1237 def eq_sql(self, expression: exp.EQ) -> str: 1238 # Operands of = cannot be NULL in BigQuery 1239 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1240 if not isinstance(expression.parent, exp.Update): 1241 return "NULL" 1242 1243 return self.binary(expression, "=") 1244 1245 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1246 parent = expression.parent 1247 1248 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1249 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1250 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1251 return self.func( 1252 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1253 ) 1254 1255 return super().attimezone_sql(expression) 1256 1257 def trycast_sql(self, expression: exp.TryCast) -> str: 1258 return self.cast_sql(expression, safe_prefix="SAFE_") 1259 1260 def bracket_sql(self, expression: exp.Bracket) -> str: 1261 this = expression.this 1262 expressions = expression.expressions 1263 1264 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1265 arg = expressions[0] 1266 if arg.type is None: 1267 from sqlglot.optimizer.annotate_types import annotate_types 1268 1269 arg = annotate_types(arg, dialect=self.dialect) 1270 1271 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1272 # BQ doesn't support bracket syntax with string values for structs 1273 return f"{self.sql(this)}.{arg.name}" 1274 1275 expressions_sql = self.expressions(expression, flat=True) 1276 offset = expression.args.get("offset") 1277 1278 if offset == 0: 1279 expressions_sql = f"OFFSET({expressions_sql})" 1280 elif offset == 1: 1281 expressions_sql = f"ORDINAL({expressions_sql})" 1282 elif offset is not None: 1283 self.unsupported(f"Unsupported array offset: {offset}") 1284 1285 if expression.args.get("safe"): 1286 expressions_sql = f"SAFE_{expressions_sql}" 1287 1288 return f"{self.sql(this)}[{expressions_sql}]" 1289 1290 def in_unnest_op(self, expression: exp.Unnest) -> str: 1291 return self.sql(expression) 1292 1293 def version_sql(self, expression: exp.Version) -> str: 1294 if expression.name == "TIMESTAMP": 1295 expression.set("this", "SYSTEM_TIME") 1296 return super().version_sql(expression) 1297 1298 def contains_sql(self, expression: exp.Contains) -> str: 1299 this = expression.this 1300 expr = expression.expression 1301 1302 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1303 this = this.this 1304 expr = expr.this 1305 1306 return self.func("CONTAINS_SUBSTR", this, expr) 1307 1308 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1309 this = expression.this 1310 1311 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1312 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1313 # because they aren't literals and so the above syntax is invalid BigQuery. 1314 if isinstance(this, exp.Array): 1315 elem = seq_get(this.expressions, 0) 1316 if not (elem and elem.find(exp.Query)): 1317 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1318 1319 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1194 def column_parts(self, expression: exp.Column) -> str: 1195 if expression.meta.get("quoted_column"): 1196 # If a column reference is of the form `dataset.table`.name, we need 1197 # to preserve the quoted table path, otherwise the reference breaks 1198 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1199 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1200 return f"{table_path}.{self.sql(expression, 'this')}" 1201 1202 return super().column_parts(expression)
1204 def table_parts(self, expression: exp.Table) -> str: 1205 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1206 # we need to make sure the correct quoting is used in each case. 1207 # 1208 # For example, if there is a CTE x that clashes with a schema name, then the former will 1209 # return the table y in that schema, whereas the latter will return the CTE's y column: 1210 # 1211 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1212 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1213 if expression.meta.get("quoted_table"): 1214 table_parts = ".".join(p.name for p in expression.parts) 1215 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1216 1217 return super().table_parts(expression)
1219 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1220 this = expression.this 1221 if isinstance(this, exp.TsOrDsToDatetime): 1222 func_name = "FORMAT_DATETIME" 1223 elif isinstance(this, exp.TsOrDsToTimestamp): 1224 func_name = "FORMAT_TIMESTAMP" 1225 else: 1226 func_name = "FORMAT_DATE" 1227 1228 time_expr = ( 1229 this 1230 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1231 else expression 1232 ) 1233 return self.func( 1234 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1235 )
1237 def eq_sql(self, expression: exp.EQ) -> str: 1238 # Operands of = cannot be NULL in BigQuery 1239 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1240 if not isinstance(expression.parent, exp.Update): 1241 return "NULL" 1242 1243 return self.binary(expression, "=")
1245 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1246 parent = expression.parent 1247 1248 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1249 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1250 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1251 return self.func( 1252 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1253 ) 1254 1255 return super().attimezone_sql(expression)
1260 def bracket_sql(self, expression: exp.Bracket) -> str: 1261 this = expression.this 1262 expressions = expression.expressions 1263 1264 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1265 arg = expressions[0] 1266 if arg.type is None: 1267 from sqlglot.optimizer.annotate_types import annotate_types 1268 1269 arg = annotate_types(arg, dialect=self.dialect) 1270 1271 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1272 # BQ doesn't support bracket syntax with string values for structs 1273 return f"{self.sql(this)}.{arg.name}" 1274 1275 expressions_sql = self.expressions(expression, flat=True) 1276 offset = expression.args.get("offset") 1277 1278 if offset == 0: 1279 expressions_sql = f"OFFSET({expressions_sql})" 1280 elif offset == 1: 1281 expressions_sql = f"ORDINAL({expressions_sql})" 1282 elif offset is not None: 1283 self.unsupported(f"Unsupported array offset: {offset}") 1284 1285 if expression.args.get("safe"): 1286 expressions_sql = f"SAFE_{expressions_sql}" 1287 1288 return f"{self.sql(this)}[{expressions_sql}]"
1308 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1309 this = expression.this 1310 1311 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1312 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1313 # because they aren't literals and so the above syntax is invalid BigQuery. 1314 if isinstance(this, exp.Array): 1315 elem = seq_get(this.expressions, 0) 1316 if not (elem and elem.find(exp.Query)): 1317 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1318 1319 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql