sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29 str_position_sql, 30) 31from sqlglot.helper import seq_get, split_num_words 32from sqlglot.tokens import TokenType 33from sqlglot.generator import unsupported_args 34 35if t.TYPE_CHECKING: 36 from sqlglot._typing import E, Lit 37 38 from sqlglot.optimizer.annotate_types import TypeAnnotator 39 40logger = logging.getLogger("sqlglot") 41 42 43JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 44 45DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 46 47 48def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 49 if not expression.find_ancestor(exp.From, exp.Join): 50 return self.values_sql(expression) 51 52 structs = [] 53 alias = expression.args.get("alias") 54 for tup in expression.find_all(exp.Tuple): 55 field_aliases = ( 56 alias.columns 57 if alias and alias.columns 58 else (f"_c{i}" for i in range(len(tup.expressions))) 59 ) 60 expressions = [ 61 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 62 for name, fld in zip(field_aliases, tup.expressions) 63 ] 64 structs.append(exp.Struct(expressions=expressions)) 65 66 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 67 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 68 return self.unnest_sql( 69 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 70 ) 71 72 73def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 74 this = expression.this 75 if isinstance(this, exp.Schema): 76 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 77 else: 78 this = self.sql(this) 79 return f"RETURNS {this}" 80 81 82def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 83 returns = expression.find(exp.ReturnsProperty) 84 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 85 expression.set("kind", "TABLE FUNCTION") 86 87 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 88 expression.set("expression", expression.expression.this) 89 90 return self.create_sql(expression) 91 92 93# https://issuetracker.google.com/issues/162294746 94# workaround for bigquery bug when grouping by an expression and then ordering 95# WITH x AS (SELECT 1 y) 96# SELECT y + 1 z 97# FROM x 98# GROUP BY x + 1 99# ORDER by z 100def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 101 if isinstance(expression, exp.Select): 102 group = expression.args.get("group") 103 order = expression.args.get("order") 104 105 if group and order: 106 aliases = { 107 select.this: select.args["alias"] 108 for select in expression.selects 109 if isinstance(select, exp.Alias) 110 } 111 112 for grouped in group.expressions: 113 if grouped.is_int: 114 continue 115 alias = aliases.get(grouped) 116 if alias: 117 grouped.replace(exp.column(alias)) 118 119 return expression 120 121 122def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 123 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 124 if isinstance(expression, exp.CTE) and expression.alias_column_names: 125 cte_query = expression.this 126 127 if cte_query.is_star: 128 logger.warning( 129 "Can't push down CTE column names for star queries. Run the query through" 130 " the optimizer or use 'qualify' to expand the star projections first." 131 ) 132 return expression 133 134 column_names = expression.alias_column_names 135 expression.args["alias"].set("columns", None) 136 137 for name, select in zip(column_names, cte_query.selects): 138 to_replace = select 139 140 if isinstance(select, exp.Alias): 141 select = select.this 142 143 # Inner aliases are shadowed by the CTE column names 144 to_replace.replace(exp.alias_(select, name)) 145 146 return expression 147 148 149def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 150 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 151 this.set("zone", seq_get(args, 2)) 152 return this 153 154 155def _build_timestamp(args: t.List) -> exp.Timestamp: 156 timestamp = exp.Timestamp.from_arg_list(args) 157 timestamp.set("with_tz", True) 158 return timestamp 159 160 161def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 162 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 163 return expr_type.from_arg_list(args) 164 165 166def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 167 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 168 arg = seq_get(args, 0) 169 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 170 171 172def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 173 return self.sql( 174 exp.Exists( 175 this=exp.select("1") 176 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 177 .where(exp.column("_col").eq(expression.right)) 178 ) 179 ) 180 181 182def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 183 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 184 185 186def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 187 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 188 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 189 unit = unit_to_var(expression) 190 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 191 192 193def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 194 scale = expression.args.get("scale") 195 timestamp = expression.this 196 197 if scale in (None, exp.UnixToTime.SECONDS): 198 return self.func("TIMESTAMP_SECONDS", timestamp) 199 if scale == exp.UnixToTime.MILLIS: 200 return self.func("TIMESTAMP_MILLIS", timestamp) 201 if scale == exp.UnixToTime.MICROS: 202 return self.func("TIMESTAMP_MICROS", timestamp) 203 204 unix_seconds = exp.cast( 205 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 206 ) 207 return self.func("TIMESTAMP_SECONDS", unix_seconds) 208 209 210def _build_time(args: t.List) -> exp.Func: 211 if len(args) == 1: 212 return exp.TsOrDsToTime(this=args[0]) 213 if len(args) == 2: 214 return exp.Time.from_arg_list(args) 215 return exp.TimeFromParts.from_arg_list(args) 216 217 218def _build_datetime(args: t.List) -> exp.Func: 219 if len(args) == 1: 220 return exp.TsOrDsToDatetime.from_arg_list(args) 221 if len(args) == 2: 222 return exp.Datetime.from_arg_list(args) 223 return exp.TimestampFromParts.from_arg_list(args) 224 225 226def _build_regexp_extract( 227 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 228) -> t.Callable[[t.List], E]: 229 def _builder(args: t.List) -> E: 230 try: 231 group = re.compile(args[1].name).groups == 1 232 except re.error: 233 group = False 234 235 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 236 return expr_type( 237 this=seq_get(args, 0), 238 expression=seq_get(args, 1), 239 position=seq_get(args, 2), 240 occurrence=seq_get(args, 3), 241 group=exp.Literal.number(1) if group else default_group, 242 ) 243 244 return _builder 245 246 247def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 248 def _builder(args: t.List, dialect: Dialect) -> E: 249 if len(args) == 1: 250 # The default value for the JSONPath is '$' i.e all of the data 251 args.append(exp.Literal.string("$")) 252 return parser.build_extract_json_with_path(expr_type)(args, dialect) 253 254 return _builder 255 256 257def _str_to_datetime_sql( 258 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 259) -> str: 260 this = self.sql(expression, "this") 261 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 262 263 if expression.args.get("safe"): 264 fmt = self.format_time( 265 expression, 266 self.dialect.INVERSE_FORMAT_MAPPING, 267 self.dialect.INVERSE_FORMAT_TRIE, 268 ) 269 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 270 271 fmt = self.format_time(expression) 272 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 273 274 275def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 276 """ 277 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 278 +---------+---------+---------+------------+---------+ 279 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 280 +---------+---------+---------+------------+---------+ 281 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 282 +---------+---------+---------+------------+---------+ 283 """ 284 self._annotate_args(expression) 285 286 this: exp.Expression = expression.this 287 288 self._set_type( 289 expression, 290 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 291 ) 292 return expression 293 294 295@unsupported_args("ins_cost", "del_cost", "sub_cost") 296def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 297 max_dist = expression.args.get("max_dist") 298 if max_dist: 299 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 300 301 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 302 303 304def _build_levenshtein(args: t.List) -> exp.Levenshtein: 305 max_dist = seq_get(args, 2) 306 return exp.Levenshtein( 307 this=seq_get(args, 0), 308 expression=seq_get(args, 1), 309 max_dist=max_dist.expression if max_dist else None, 310 ) 311 312 313def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 314 def _builder(args: t.List) -> exp.TimeToStr: 315 return exp.TimeToStr(this=expr_type(this=seq_get(args, 1)), format=seq_get(args, 0)) 316 317 return _builder 318 319 320def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 321 if len(args) == 3: 322 return exp.Anonymous(this="CONTAINS_SUBSTRING", expressions=args) 323 324 # Lowercase the operands in case of transpilation, as exp.Contains 325 # is case-sensitive on other dialects 326 this = exp.Lower(this=seq_get(args, 0)) 327 expr = exp.Lower(this=seq_get(args, 1)) 328 329 return exp.Contains(this=this, expression=expr) 330 331 332def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 333 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 334 upper = name.upper() 335 336 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 337 338 if dquote_escaping: 339 self._quote_json_path_key_using_brackets = False 340 341 sql = rename_func(upper)(self, expression) 342 343 if dquote_escaping: 344 self._quote_json_path_key_using_brackets = True 345 346 return sql 347 348 349class BigQuery(Dialect): 350 WEEK_OFFSET = -1 351 UNNEST_COLUMN_ONLY = True 352 SUPPORTS_USER_DEFINED_TYPES = False 353 SUPPORTS_SEMI_ANTI_JOIN = False 354 LOG_BASE_FIRST = False 355 HEX_LOWERCASE = True 356 FORCE_EARLY_ALIAS_REF_EXPANSION = True 357 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 358 PRESERVE_ORIGINAL_NAMES = True 359 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 361 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 362 363 # bigquery udfs are case sensitive 364 NORMALIZE_FUNCTIONS = False 365 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 367 TIME_MAPPING = { 368 "%D": "%m/%d/%y", 369 "%E6S": "%S.%f", 370 "%e": "%-d", 371 } 372 373 FORMAT_MAPPING = { 374 "DD": "%d", 375 "MM": "%m", 376 "MON": "%b", 377 "MONTH": "%B", 378 "YYYY": "%Y", 379 "YY": "%y", 380 "HH": "%I", 381 "HH12": "%I", 382 "HH24": "%H", 383 "MI": "%M", 384 "SS": "%S", 385 "SSSSS": "%f", 386 "TZH": "%z", 387 } 388 389 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 390 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 391 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 392 393 # All set operations require either a DISTINCT or ALL specifier 394 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 395 396 ANNOTATORS = { 397 **Dialect.ANNOTATORS, 398 **{ 399 expr_type: lambda self, e: _annotate_math_functions(self, e) 400 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 401 }, 402 **{ 403 expr_type: lambda self, e: self._annotate_by_args(e, "this") 404 for expr_type in ( 405 exp.Left, 406 exp.Right, 407 exp.Lower, 408 exp.Upper, 409 exp.Pad, 410 exp.Trim, 411 exp.RegexpExtract, 412 exp.RegexpReplace, 413 exp.Repeat, 414 exp.Substring, 415 ) 416 }, 417 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 418 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 419 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 420 } 421 422 def normalize_identifier(self, expression: E) -> E: 423 if ( 424 isinstance(expression, exp.Identifier) 425 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 426 ): 427 parent = expression.parent 428 while isinstance(parent, exp.Dot): 429 parent = parent.parent 430 431 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 432 # by default. The following check uses a heuristic to detect tables based on whether 433 # they are qualified. This should generally be correct, because tables in BigQuery 434 # must be qualified with at least a dataset, unless @@dataset_id is set. 435 case_sensitive = ( 436 isinstance(parent, exp.UserDefinedFunction) 437 or ( 438 isinstance(parent, exp.Table) 439 and parent.db 440 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 441 ) 442 or expression.meta.get("is_table") 443 ) 444 if not case_sensitive: 445 expression.set("this", expression.this.lower()) 446 447 return expression 448 449 class Tokenizer(tokens.Tokenizer): 450 QUOTES = ["'", '"', '"""', "'''"] 451 COMMENTS = ["--", "#", ("/*", "*/")] 452 IDENTIFIERS = ["`"] 453 STRING_ESCAPES = ["\\"] 454 455 HEX_STRINGS = [("0x", ""), ("0X", "")] 456 457 BYTE_STRINGS = [ 458 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 459 ] 460 461 RAW_STRINGS = [ 462 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 463 ] 464 465 KEYWORDS = { 466 **tokens.Tokenizer.KEYWORDS, 467 "ANY TYPE": TokenType.VARIANT, 468 "BEGIN": TokenType.COMMAND, 469 "BEGIN TRANSACTION": TokenType.BEGIN, 470 "BYTEINT": TokenType.INT, 471 "BYTES": TokenType.BINARY, 472 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 473 "DATETIME": TokenType.TIMESTAMP, 474 "DECLARE": TokenType.COMMAND, 475 "ELSEIF": TokenType.COMMAND, 476 "EXCEPTION": TokenType.COMMAND, 477 "FLOAT64": TokenType.DOUBLE, 478 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 479 "MODEL": TokenType.MODEL, 480 "NOT DETERMINISTIC": TokenType.VOLATILE, 481 "RECORD": TokenType.STRUCT, 482 "TIMESTAMP": TokenType.TIMESTAMPTZ, 483 } 484 KEYWORDS.pop("DIV") 485 KEYWORDS.pop("VALUES") 486 KEYWORDS.pop("/*+") 487 488 class Parser(parser.Parser): 489 PREFIXED_PIVOT_COLUMNS = True 490 LOG_DEFAULTS_TO_LN = True 491 SUPPORTS_IMPLICIT_UNNEST = True 492 493 FUNCTIONS = { 494 **parser.Parser.FUNCTIONS, 495 "CONTAINS_SUBSTRING": _build_contains_substring, 496 "DATE": _build_date, 497 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 498 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 499 "DATE_TRUNC": lambda args: exp.DateTrunc( 500 unit=exp.Literal.string(str(seq_get(args, 1))), 501 this=seq_get(args, 0), 502 zone=seq_get(args, 2), 503 ), 504 "DATETIME": _build_datetime, 505 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 506 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 507 "DIV": binary_from_function(exp.IntDiv), 508 "EDIT_DISTANCE": _build_levenshtein, 509 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 510 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 511 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 512 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 513 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 514 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 515 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 516 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 517 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 518 "MD5": exp.MD5Digest.from_arg_list, 519 "TO_HEX": _build_to_hex, 520 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 521 [seq_get(args, 1), seq_get(args, 0)] 522 ), 523 "PARSE_TIMESTAMP": _build_parse_timestamp, 524 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 525 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 526 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 527 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 528 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 529 ), 530 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 531 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 532 "SPLIT": lambda args: exp.Split( 533 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 534 this=seq_get(args, 0), 535 expression=seq_get(args, 1) or exp.Literal.string(","), 536 ), 537 "STRPOS": exp.StrPosition.from_arg_list, 538 "TIME": _build_time, 539 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 540 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 541 "TIMESTAMP": _build_timestamp, 542 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 543 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 544 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 545 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 546 ), 547 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 548 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 549 ), 550 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 551 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 552 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 553 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 554 } 555 556 FUNCTION_PARSERS = { 557 **parser.Parser.FUNCTION_PARSERS, 558 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 559 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 560 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 561 } 562 FUNCTION_PARSERS.pop("TRIM") 563 564 NO_PAREN_FUNCTIONS = { 565 **parser.Parser.NO_PAREN_FUNCTIONS, 566 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 567 } 568 569 NESTED_TYPE_TOKENS = { 570 *parser.Parser.NESTED_TYPE_TOKENS, 571 TokenType.TABLE, 572 } 573 574 PROPERTY_PARSERS = { 575 **parser.Parser.PROPERTY_PARSERS, 576 "NOT DETERMINISTIC": lambda self: self.expression( 577 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 578 ), 579 "OPTIONS": lambda self: self._parse_with_property(), 580 } 581 582 CONSTRAINT_PARSERS = { 583 **parser.Parser.CONSTRAINT_PARSERS, 584 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 585 } 586 587 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 588 RANGE_PARSERS.pop(TokenType.OVERLAPS) 589 590 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 591 592 STATEMENT_PARSERS = { 593 **parser.Parser.STATEMENT_PARSERS, 594 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 595 TokenType.END: lambda self: self._parse_as_command(self._prev), 596 TokenType.FOR: lambda self: self._parse_for_in(), 597 } 598 599 BRACKET_OFFSETS = { 600 "OFFSET": (0, False), 601 "ORDINAL": (1, False), 602 "SAFE_OFFSET": (0, True), 603 "SAFE_ORDINAL": (1, True), 604 } 605 606 def _parse_for_in(self) -> exp.ForIn: 607 this = self._parse_range() 608 self._match_text_seq("DO") 609 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 610 611 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 612 this = super()._parse_table_part(schema=schema) or self._parse_number() 613 614 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 615 if isinstance(this, exp.Identifier): 616 table_name = this.name 617 while self._match(TokenType.DASH, advance=False) and self._next: 618 text = "" 619 while self._is_connected() and self._curr.token_type != TokenType.DOT: 620 self._advance() 621 text += self._prev.text 622 table_name += text 623 624 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 625 elif isinstance(this, exp.Literal): 626 table_name = this.name 627 628 if self._is_connected() and self._parse_var(any_token=True): 629 table_name += self._prev.text 630 631 this = exp.Identifier(this=table_name, quoted=True) 632 633 return this 634 635 def _parse_table_parts( 636 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 637 ) -> exp.Table: 638 table = super()._parse_table_parts( 639 schema=schema, is_db_reference=is_db_reference, wildcard=True 640 ) 641 642 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 643 if not table.catalog: 644 if table.db: 645 parts = table.db.split(".") 646 if len(parts) == 2 and not table.args["db"].quoted: 647 table.set("catalog", exp.Identifier(this=parts[0])) 648 table.set("db", exp.Identifier(this=parts[1])) 649 else: 650 parts = table.name.split(".") 651 if len(parts) == 2 and not table.this.quoted: 652 table.set("db", exp.Identifier(this=parts[0])) 653 table.set("this", exp.Identifier(this=parts[1])) 654 655 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 656 alias = table.this 657 catalog, db, this, *rest = ( 658 exp.to_identifier(p, quoted=True) 659 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 660 ) 661 662 if rest and this: 663 this = exp.Dot.build([this, *rest]) # type: ignore 664 665 table = exp.Table( 666 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 667 ) 668 table.meta["quoted_table"] = True 669 else: 670 alias = None 671 672 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 673 # dataset, so if the project identifier is omitted we need to fix the ast so that 674 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 675 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 676 # views, because it would seem like the "catalog" part is set, when it'd actually 677 # be the region/dataset. Merging the two identifiers into a single one is done to 678 # avoid producing a 4-part Table reference, which would cause issues in the schema 679 # module, when there are 3-part table names mixed with information schema views. 680 # 681 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 682 table_parts = table.parts 683 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 684 # We need to alias the table here to avoid breaking existing qualified columns. 685 # This is expected to be safe, because if there's an actual alias coming up in 686 # the token stream, it will overwrite this one. If there isn't one, we are only 687 # exposing the name that can be used to reference the view explicitly (a no-op). 688 exp.alias_( 689 table, 690 t.cast(exp.Identifier, alias or table_parts[-1]), 691 table=True, 692 copy=False, 693 ) 694 695 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 696 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 697 table.set("db", seq_get(table_parts, -3)) 698 table.set("catalog", seq_get(table_parts, -4)) 699 700 return table 701 702 def _parse_column(self) -> t.Optional[exp.Expression]: 703 column = super()._parse_column() 704 if isinstance(column, exp.Column): 705 parts = column.parts 706 if any("." in p.name for p in parts): 707 catalog, db, table, this, *rest = ( 708 exp.to_identifier(p, quoted=True) 709 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 710 ) 711 712 if rest and this: 713 this = exp.Dot.build([this, *rest]) # type: ignore 714 715 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 716 column.meta["quoted_column"] = True 717 718 return column 719 720 @t.overload 721 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 722 723 @t.overload 724 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 725 726 def _parse_json_object(self, agg=False): 727 json_object = super()._parse_json_object() 728 array_kv_pair = seq_get(json_object.expressions, 0) 729 730 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 731 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 732 if ( 733 array_kv_pair 734 and isinstance(array_kv_pair.this, exp.Array) 735 and isinstance(array_kv_pair.expression, exp.Array) 736 ): 737 keys = array_kv_pair.this.expressions 738 values = array_kv_pair.expression.expressions 739 740 json_object.set( 741 "expressions", 742 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 743 ) 744 745 return json_object 746 747 def _parse_bracket( 748 self, this: t.Optional[exp.Expression] = None 749 ) -> t.Optional[exp.Expression]: 750 bracket = super()._parse_bracket(this) 751 752 if this is bracket: 753 return bracket 754 755 if isinstance(bracket, exp.Bracket): 756 for expression in bracket.expressions: 757 name = expression.name.upper() 758 759 if name not in self.BRACKET_OFFSETS: 760 break 761 762 offset, safe = self.BRACKET_OFFSETS[name] 763 bracket.set("offset", offset) 764 bracket.set("safe", safe) 765 expression.replace(expression.expressions[0]) 766 767 return bracket 768 769 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 770 unnest = super()._parse_unnest(with_alias=with_alias) 771 772 if not unnest: 773 return None 774 775 unnest_expr = seq_get(unnest.expressions, 0) 776 if unnest_expr: 777 from sqlglot.optimizer.annotate_types import annotate_types 778 779 unnest_expr = annotate_types(unnest_expr) 780 781 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 782 # in contrast to other dialects such as DuckDB which flattens only the array by default 783 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 784 array_elem.is_type(exp.DataType.Type.STRUCT) 785 for array_elem in unnest_expr._type.expressions 786 ): 787 unnest.set("explode_array", True) 788 789 return unnest 790 791 def _parse_make_interval(self) -> exp.MakeInterval: 792 expr = exp.MakeInterval() 793 794 for arg_key in expr.arg_types: 795 value = self._parse_lambda() 796 797 if not value: 798 break 799 800 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 801 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 802 if isinstance(value, exp.Kwarg): 803 arg_key = value.this.name 804 805 expr.set(arg_key, value) 806 807 self._match(TokenType.COMMA) 808 809 return expr 810 811 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 812 expr = self.expression( 813 exp.FeaturesAtTime, 814 this=(self._match(TokenType.TABLE) and self._parse_table()) 815 or self._parse_select(nested=True), 816 ) 817 818 while self._match(TokenType.COMMA): 819 arg = self._parse_lambda() 820 821 # Get the LHS of the Kwarg and set the arg to that value, e.g 822 # "num_rows => 1" sets the expr's `num_rows` arg 823 if arg: 824 expr.set(arg.this.name, arg) 825 826 return expr 827 828 class Generator(generator.Generator): 829 INTERVAL_ALLOWS_PLURAL_FORM = False 830 JOIN_HINTS = False 831 QUERY_HINTS = False 832 TABLE_HINTS = False 833 LIMIT_FETCH = "LIMIT" 834 RENAME_TABLE_WITH_DB = False 835 NVL2_SUPPORTED = False 836 UNNEST_WITH_ORDINALITY = False 837 COLLATE_IS_FUNC = True 838 LIMIT_ONLY_LITERALS = True 839 SUPPORTS_TABLE_ALIAS_COLUMNS = False 840 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 841 JSON_KEY_VALUE_PAIR_SEP = "," 842 NULL_ORDERING_SUPPORTED = False 843 IGNORE_NULLS_IN_FUNC = True 844 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 845 CAN_IMPLEMENT_ARRAY_ANY = True 846 SUPPORTS_TO_NUMBER = False 847 NAMED_PLACEHOLDER_TOKEN = "@" 848 HEX_FUNC = "TO_HEX" 849 WITH_PROPERTIES_PREFIX = "OPTIONS" 850 SUPPORTS_EXPLODING_PROJECTIONS = False 851 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 852 SUPPORTS_UNIX_SECONDS = True 853 854 TRANSFORMS = { 855 **generator.Generator.TRANSFORMS, 856 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 857 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 858 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 859 exp.Array: inline_array_unless_query, 860 exp.ArrayContains: _array_contains_sql, 861 exp.ArrayFilter: filter_array_using_unnest, 862 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 863 exp.CollateProperty: lambda self, e: ( 864 f"DEFAULT COLLATE {self.sql(e, 'this')}" 865 if e.args.get("default") 866 else f"COLLATE {self.sql(e, 'this')}" 867 ), 868 exp.Commit: lambda *_: "COMMIT TRANSACTION", 869 exp.CountIf: rename_func("COUNTIF"), 870 exp.Create: _create_sql, 871 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 872 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 873 exp.DateDiff: lambda self, e: self.func( 874 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 875 ), 876 exp.DateFromParts: rename_func("DATE"), 877 exp.DateStrToDate: datestrtodate_sql, 878 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 879 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 880 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 881 exp.DateTrunc: lambda self, e: self.func( 882 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 883 ), 884 exp.FromTimeZone: lambda self, e: self.func( 885 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 886 ), 887 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 888 exp.GroupConcat: rename_func("STRING_AGG"), 889 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 890 exp.If: if_sql(false_value="NULL"), 891 exp.ILike: no_ilike_sql, 892 exp.IntDiv: rename_func("DIV"), 893 exp.Int64: rename_func("INT64"), 894 exp.JSONExtract: _json_extract_sql, 895 exp.JSONExtractArray: _json_extract_sql, 896 exp.JSONExtractScalar: _json_extract_sql, 897 exp.JSONFormat: rename_func("TO_JSON_STRING"), 898 exp.Levenshtein: _levenshtein_sql, 899 exp.Max: max_or_greatest, 900 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 901 exp.MD5Digest: rename_func("MD5"), 902 exp.Min: min_or_least, 903 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 904 exp.RegexpExtract: lambda self, e: self.func( 905 "REGEXP_EXTRACT", 906 e.this, 907 e.expression, 908 e.args.get("position"), 909 e.args.get("occurrence"), 910 ), 911 exp.RegexpExtractAll: lambda self, e: self.func( 912 "REGEXP_EXTRACT_ALL", e.this, e.expression 913 ), 914 exp.RegexpReplace: regexp_replace_sql, 915 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 916 exp.ReturnsProperty: _returnsproperty_sql, 917 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 918 exp.Select: transforms.preprocess( 919 [ 920 transforms.explode_to_unnest(), 921 transforms.unqualify_unnest, 922 transforms.eliminate_distinct_on, 923 _alias_ordered_group, 924 transforms.eliminate_semi_and_anti_joins, 925 ] 926 ), 927 exp.SHA: rename_func("SHA1"), 928 exp.SHA2: sha256_sql, 929 exp.StabilityProperty: lambda self, e: ( 930 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 931 ), 932 exp.String: rename_func("STRING"), 933 exp.StrPosition: str_position_sql, 934 exp.StrToDate: _str_to_datetime_sql, 935 exp.StrToTime: _str_to_datetime_sql, 936 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 937 exp.TimeFromParts: rename_func("TIME"), 938 exp.TimestampFromParts: rename_func("DATETIME"), 939 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 940 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 941 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 942 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 943 exp.TimeStrToTime: timestrtotime_sql, 944 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 945 exp.TsOrDsAdd: _ts_or_ds_add_sql, 946 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 947 exp.TsOrDsToTime: rename_func("TIME"), 948 exp.TsOrDsToDatetime: rename_func("DATETIME"), 949 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 950 exp.Unhex: rename_func("FROM_HEX"), 951 exp.UnixDate: rename_func("UNIX_DATE"), 952 exp.UnixToTime: _unix_to_time_sql, 953 exp.Uuid: lambda *_: "GENERATE_UUID()", 954 exp.Values: _derived_table_values_to_unnest, 955 exp.VariancePop: rename_func("VAR_POP"), 956 } 957 958 SUPPORTED_JSON_PATH_PARTS = { 959 exp.JSONPathKey, 960 exp.JSONPathRoot, 961 exp.JSONPathSubscript, 962 } 963 964 TYPE_MAPPING = { 965 **generator.Generator.TYPE_MAPPING, 966 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 967 exp.DataType.Type.BIGINT: "INT64", 968 exp.DataType.Type.BINARY: "BYTES", 969 exp.DataType.Type.BOOLEAN: "BOOL", 970 exp.DataType.Type.CHAR: "STRING", 971 exp.DataType.Type.DECIMAL: "NUMERIC", 972 exp.DataType.Type.DOUBLE: "FLOAT64", 973 exp.DataType.Type.FLOAT: "FLOAT64", 974 exp.DataType.Type.INT: "INT64", 975 exp.DataType.Type.NCHAR: "STRING", 976 exp.DataType.Type.NVARCHAR: "STRING", 977 exp.DataType.Type.SMALLINT: "INT64", 978 exp.DataType.Type.TEXT: "STRING", 979 exp.DataType.Type.TIMESTAMP: "DATETIME", 980 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 981 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 982 exp.DataType.Type.TINYINT: "INT64", 983 exp.DataType.Type.ROWVERSION: "BYTES", 984 exp.DataType.Type.UUID: "STRING", 985 exp.DataType.Type.VARBINARY: "BYTES", 986 exp.DataType.Type.VARCHAR: "STRING", 987 exp.DataType.Type.VARIANT: "ANY TYPE", 988 } 989 990 PROPERTIES_LOCATION = { 991 **generator.Generator.PROPERTIES_LOCATION, 992 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 993 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 994 } 995 996 # WINDOW comes after QUALIFY 997 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 998 AFTER_HAVING_MODIFIER_TRANSFORMS = { 999 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1000 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1001 } 1002 1003 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1004 RESERVED_KEYWORDS = { 1005 "all", 1006 "and", 1007 "any", 1008 "array", 1009 "as", 1010 "asc", 1011 "assert_rows_modified", 1012 "at", 1013 "between", 1014 "by", 1015 "case", 1016 "cast", 1017 "collate", 1018 "contains", 1019 "create", 1020 "cross", 1021 "cube", 1022 "current", 1023 "default", 1024 "define", 1025 "desc", 1026 "distinct", 1027 "else", 1028 "end", 1029 "enum", 1030 "escape", 1031 "except", 1032 "exclude", 1033 "exists", 1034 "extract", 1035 "false", 1036 "fetch", 1037 "following", 1038 "for", 1039 "from", 1040 "full", 1041 "group", 1042 "grouping", 1043 "groups", 1044 "hash", 1045 "having", 1046 "if", 1047 "ignore", 1048 "in", 1049 "inner", 1050 "intersect", 1051 "interval", 1052 "into", 1053 "is", 1054 "join", 1055 "lateral", 1056 "left", 1057 "like", 1058 "limit", 1059 "lookup", 1060 "merge", 1061 "natural", 1062 "new", 1063 "no", 1064 "not", 1065 "null", 1066 "nulls", 1067 "of", 1068 "on", 1069 "or", 1070 "order", 1071 "outer", 1072 "over", 1073 "partition", 1074 "preceding", 1075 "proto", 1076 "qualify", 1077 "range", 1078 "recursive", 1079 "respect", 1080 "right", 1081 "rollup", 1082 "rows", 1083 "select", 1084 "set", 1085 "some", 1086 "struct", 1087 "tablesample", 1088 "then", 1089 "to", 1090 "treat", 1091 "true", 1092 "unbounded", 1093 "union", 1094 "unnest", 1095 "using", 1096 "when", 1097 "where", 1098 "window", 1099 "with", 1100 "within", 1101 } 1102 1103 def mod_sql(self, expression: exp.Mod) -> str: 1104 this = expression.this 1105 expr = expression.expression 1106 return self.func( 1107 "MOD", 1108 this.unnest() if isinstance(this, exp.Paren) else this, 1109 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1110 ) 1111 1112 def column_parts(self, expression: exp.Column) -> str: 1113 if expression.meta.get("quoted_column"): 1114 # If a column reference is of the form `dataset.table`.name, we need 1115 # to preserve the quoted table path, otherwise the reference breaks 1116 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1117 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1118 return f"{table_path}.{self.sql(expression, 'this')}" 1119 1120 return super().column_parts(expression) 1121 1122 def table_parts(self, expression: exp.Table) -> str: 1123 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1124 # we need to make sure the correct quoting is used in each case. 1125 # 1126 # For example, if there is a CTE x that clashes with a schema name, then the former will 1127 # return the table y in that schema, whereas the latter will return the CTE's y column: 1128 # 1129 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1130 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1131 if expression.meta.get("quoted_table"): 1132 table_parts = ".".join(p.name for p in expression.parts) 1133 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1134 1135 return super().table_parts(expression) 1136 1137 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1138 this = expression.this 1139 if isinstance(this, exp.TsOrDsToDatetime): 1140 func_name = "FORMAT_DATETIME" 1141 elif isinstance(this, exp.TsOrDsToTimestamp): 1142 func_name = "FORMAT_TIMESTAMP" 1143 else: 1144 func_name = "FORMAT_DATE" 1145 1146 time_expr = ( 1147 this 1148 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1149 else expression 1150 ) 1151 return self.func(func_name, self.format_time(expression), time_expr.this) 1152 1153 def eq_sql(self, expression: exp.EQ) -> str: 1154 # Operands of = cannot be NULL in BigQuery 1155 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1156 if not isinstance(expression.parent, exp.Update): 1157 return "NULL" 1158 1159 return self.binary(expression, "=") 1160 1161 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1162 parent = expression.parent 1163 1164 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1165 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1166 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1167 return self.func( 1168 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1169 ) 1170 1171 return super().attimezone_sql(expression) 1172 1173 def trycast_sql(self, expression: exp.TryCast) -> str: 1174 return self.cast_sql(expression, safe_prefix="SAFE_") 1175 1176 def bracket_sql(self, expression: exp.Bracket) -> str: 1177 this = expression.this 1178 expressions = expression.expressions 1179 1180 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1181 arg = expressions[0] 1182 if arg.type is None: 1183 from sqlglot.optimizer.annotate_types import annotate_types 1184 1185 arg = annotate_types(arg) 1186 1187 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1188 # BQ doesn't support bracket syntax with string values for structs 1189 return f"{self.sql(this)}.{arg.name}" 1190 1191 expressions_sql = self.expressions(expression, flat=True) 1192 offset = expression.args.get("offset") 1193 1194 if offset == 0: 1195 expressions_sql = f"OFFSET({expressions_sql})" 1196 elif offset == 1: 1197 expressions_sql = f"ORDINAL({expressions_sql})" 1198 elif offset is not None: 1199 self.unsupported(f"Unsupported array offset: {offset}") 1200 1201 if expression.args.get("safe"): 1202 expressions_sql = f"SAFE_{expressions_sql}" 1203 1204 return f"{self.sql(this)}[{expressions_sql}]" 1205 1206 def in_unnest_op(self, expression: exp.Unnest) -> str: 1207 return self.sql(expression) 1208 1209 def version_sql(self, expression: exp.Version) -> str: 1210 if expression.name == "TIMESTAMP": 1211 expression.set("this", "SYSTEM_TIME") 1212 return super().version_sql(expression) 1213 1214 def contains_sql(self, expression: exp.Contains) -> str: 1215 this = expression.this 1216 expr = expression.expression 1217 1218 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1219 this = this.this 1220 expr = expr.this 1221 1222 return self.func("CONTAINS_SUBSTRING", this, expr)
350class BigQuery(Dialect): 351 WEEK_OFFSET = -1 352 UNNEST_COLUMN_ONLY = True 353 SUPPORTS_USER_DEFINED_TYPES = False 354 SUPPORTS_SEMI_ANTI_JOIN = False 355 LOG_BASE_FIRST = False 356 HEX_LOWERCASE = True 357 FORCE_EARLY_ALIAS_REF_EXPANSION = True 358 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 359 PRESERVE_ORIGINAL_NAMES = True 360 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 362 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 363 364 # bigquery udfs are case sensitive 365 NORMALIZE_FUNCTIONS = False 366 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 368 TIME_MAPPING = { 369 "%D": "%m/%d/%y", 370 "%E6S": "%S.%f", 371 "%e": "%-d", 372 } 373 374 FORMAT_MAPPING = { 375 "DD": "%d", 376 "MM": "%m", 377 "MON": "%b", 378 "MONTH": "%B", 379 "YYYY": "%Y", 380 "YY": "%y", 381 "HH": "%I", 382 "HH12": "%I", 383 "HH24": "%H", 384 "MI": "%M", 385 "SS": "%S", 386 "SSSSS": "%f", 387 "TZH": "%z", 388 } 389 390 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 391 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 392 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 393 394 # All set operations require either a DISTINCT or ALL specifier 395 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 396 397 ANNOTATORS = { 398 **Dialect.ANNOTATORS, 399 **{ 400 expr_type: lambda self, e: _annotate_math_functions(self, e) 401 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 402 }, 403 **{ 404 expr_type: lambda self, e: self._annotate_by_args(e, "this") 405 for expr_type in ( 406 exp.Left, 407 exp.Right, 408 exp.Lower, 409 exp.Upper, 410 exp.Pad, 411 exp.Trim, 412 exp.RegexpExtract, 413 exp.RegexpReplace, 414 exp.Repeat, 415 exp.Substring, 416 ) 417 }, 418 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 419 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 420 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 421 } 422 423 def normalize_identifier(self, expression: E) -> E: 424 if ( 425 isinstance(expression, exp.Identifier) 426 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 427 ): 428 parent = expression.parent 429 while isinstance(parent, exp.Dot): 430 parent = parent.parent 431 432 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 433 # by default. The following check uses a heuristic to detect tables based on whether 434 # they are qualified. This should generally be correct, because tables in BigQuery 435 # must be qualified with at least a dataset, unless @@dataset_id is set. 436 case_sensitive = ( 437 isinstance(parent, exp.UserDefinedFunction) 438 or ( 439 isinstance(parent, exp.Table) 440 and parent.db 441 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 442 ) 443 or expression.meta.get("is_table") 444 ) 445 if not case_sensitive: 446 expression.set("this", expression.this.lower()) 447 448 return expression 449 450 class Tokenizer(tokens.Tokenizer): 451 QUOTES = ["'", '"', '"""', "'''"] 452 COMMENTS = ["--", "#", ("/*", "*/")] 453 IDENTIFIERS = ["`"] 454 STRING_ESCAPES = ["\\"] 455 456 HEX_STRINGS = [("0x", ""), ("0X", "")] 457 458 BYTE_STRINGS = [ 459 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 460 ] 461 462 RAW_STRINGS = [ 463 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 464 ] 465 466 KEYWORDS = { 467 **tokens.Tokenizer.KEYWORDS, 468 "ANY TYPE": TokenType.VARIANT, 469 "BEGIN": TokenType.COMMAND, 470 "BEGIN TRANSACTION": TokenType.BEGIN, 471 "BYTEINT": TokenType.INT, 472 "BYTES": TokenType.BINARY, 473 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 474 "DATETIME": TokenType.TIMESTAMP, 475 "DECLARE": TokenType.COMMAND, 476 "ELSEIF": TokenType.COMMAND, 477 "EXCEPTION": TokenType.COMMAND, 478 "FLOAT64": TokenType.DOUBLE, 479 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 480 "MODEL": TokenType.MODEL, 481 "NOT DETERMINISTIC": TokenType.VOLATILE, 482 "RECORD": TokenType.STRUCT, 483 "TIMESTAMP": TokenType.TIMESTAMPTZ, 484 } 485 KEYWORDS.pop("DIV") 486 KEYWORDS.pop("VALUES") 487 KEYWORDS.pop("/*+") 488 489 class Parser(parser.Parser): 490 PREFIXED_PIVOT_COLUMNS = True 491 LOG_DEFAULTS_TO_LN = True 492 SUPPORTS_IMPLICIT_UNNEST = True 493 494 FUNCTIONS = { 495 **parser.Parser.FUNCTIONS, 496 "CONTAINS_SUBSTRING": _build_contains_substring, 497 "DATE": _build_date, 498 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 499 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 500 "DATE_TRUNC": lambda args: exp.DateTrunc( 501 unit=exp.Literal.string(str(seq_get(args, 1))), 502 this=seq_get(args, 0), 503 zone=seq_get(args, 2), 504 ), 505 "DATETIME": _build_datetime, 506 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 507 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 508 "DIV": binary_from_function(exp.IntDiv), 509 "EDIT_DISTANCE": _build_levenshtein, 510 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 511 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 512 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 513 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 514 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 515 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 517 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 518 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 519 "MD5": exp.MD5Digest.from_arg_list, 520 "TO_HEX": _build_to_hex, 521 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 522 [seq_get(args, 1), seq_get(args, 0)] 523 ), 524 "PARSE_TIMESTAMP": _build_parse_timestamp, 525 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 526 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 527 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 528 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 529 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 530 ), 531 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 532 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 533 "SPLIT": lambda args: exp.Split( 534 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 535 this=seq_get(args, 0), 536 expression=seq_get(args, 1) or exp.Literal.string(","), 537 ), 538 "STRPOS": exp.StrPosition.from_arg_list, 539 "TIME": _build_time, 540 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 541 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 542 "TIMESTAMP": _build_timestamp, 543 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 544 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 545 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 546 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 547 ), 548 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 549 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 550 ), 551 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 552 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 553 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 554 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 555 } 556 557 FUNCTION_PARSERS = { 558 **parser.Parser.FUNCTION_PARSERS, 559 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 560 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 561 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 562 } 563 FUNCTION_PARSERS.pop("TRIM") 564 565 NO_PAREN_FUNCTIONS = { 566 **parser.Parser.NO_PAREN_FUNCTIONS, 567 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 568 } 569 570 NESTED_TYPE_TOKENS = { 571 *parser.Parser.NESTED_TYPE_TOKENS, 572 TokenType.TABLE, 573 } 574 575 PROPERTY_PARSERS = { 576 **parser.Parser.PROPERTY_PARSERS, 577 "NOT DETERMINISTIC": lambda self: self.expression( 578 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 579 ), 580 "OPTIONS": lambda self: self._parse_with_property(), 581 } 582 583 CONSTRAINT_PARSERS = { 584 **parser.Parser.CONSTRAINT_PARSERS, 585 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 586 } 587 588 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 589 RANGE_PARSERS.pop(TokenType.OVERLAPS) 590 591 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 592 593 STATEMENT_PARSERS = { 594 **parser.Parser.STATEMENT_PARSERS, 595 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 596 TokenType.END: lambda self: self._parse_as_command(self._prev), 597 TokenType.FOR: lambda self: self._parse_for_in(), 598 } 599 600 BRACKET_OFFSETS = { 601 "OFFSET": (0, False), 602 "ORDINAL": (1, False), 603 "SAFE_OFFSET": (0, True), 604 "SAFE_ORDINAL": (1, True), 605 } 606 607 def _parse_for_in(self) -> exp.ForIn: 608 this = self._parse_range() 609 self._match_text_seq("DO") 610 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 611 612 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 613 this = super()._parse_table_part(schema=schema) or self._parse_number() 614 615 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 616 if isinstance(this, exp.Identifier): 617 table_name = this.name 618 while self._match(TokenType.DASH, advance=False) and self._next: 619 text = "" 620 while self._is_connected() and self._curr.token_type != TokenType.DOT: 621 self._advance() 622 text += self._prev.text 623 table_name += text 624 625 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 626 elif isinstance(this, exp.Literal): 627 table_name = this.name 628 629 if self._is_connected() and self._parse_var(any_token=True): 630 table_name += self._prev.text 631 632 this = exp.Identifier(this=table_name, quoted=True) 633 634 return this 635 636 def _parse_table_parts( 637 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 638 ) -> exp.Table: 639 table = super()._parse_table_parts( 640 schema=schema, is_db_reference=is_db_reference, wildcard=True 641 ) 642 643 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 644 if not table.catalog: 645 if table.db: 646 parts = table.db.split(".") 647 if len(parts) == 2 and not table.args["db"].quoted: 648 table.set("catalog", exp.Identifier(this=parts[0])) 649 table.set("db", exp.Identifier(this=parts[1])) 650 else: 651 parts = table.name.split(".") 652 if len(parts) == 2 and not table.this.quoted: 653 table.set("db", exp.Identifier(this=parts[0])) 654 table.set("this", exp.Identifier(this=parts[1])) 655 656 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 657 alias = table.this 658 catalog, db, this, *rest = ( 659 exp.to_identifier(p, quoted=True) 660 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 661 ) 662 663 if rest and this: 664 this = exp.Dot.build([this, *rest]) # type: ignore 665 666 table = exp.Table( 667 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 668 ) 669 table.meta["quoted_table"] = True 670 else: 671 alias = None 672 673 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 674 # dataset, so if the project identifier is omitted we need to fix the ast so that 675 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 676 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 677 # views, because it would seem like the "catalog" part is set, when it'd actually 678 # be the region/dataset. Merging the two identifiers into a single one is done to 679 # avoid producing a 4-part Table reference, which would cause issues in the schema 680 # module, when there are 3-part table names mixed with information schema views. 681 # 682 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 683 table_parts = table.parts 684 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 685 # We need to alias the table here to avoid breaking existing qualified columns. 686 # This is expected to be safe, because if there's an actual alias coming up in 687 # the token stream, it will overwrite this one. If there isn't one, we are only 688 # exposing the name that can be used to reference the view explicitly (a no-op). 689 exp.alias_( 690 table, 691 t.cast(exp.Identifier, alias or table_parts[-1]), 692 table=True, 693 copy=False, 694 ) 695 696 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 697 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 698 table.set("db", seq_get(table_parts, -3)) 699 table.set("catalog", seq_get(table_parts, -4)) 700 701 return table 702 703 def _parse_column(self) -> t.Optional[exp.Expression]: 704 column = super()._parse_column() 705 if isinstance(column, exp.Column): 706 parts = column.parts 707 if any("." in p.name for p in parts): 708 catalog, db, table, this, *rest = ( 709 exp.to_identifier(p, quoted=True) 710 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 711 ) 712 713 if rest and this: 714 this = exp.Dot.build([this, *rest]) # type: ignore 715 716 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 717 column.meta["quoted_column"] = True 718 719 return column 720 721 @t.overload 722 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 723 724 @t.overload 725 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 726 727 def _parse_json_object(self, agg=False): 728 json_object = super()._parse_json_object() 729 array_kv_pair = seq_get(json_object.expressions, 0) 730 731 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 732 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 733 if ( 734 array_kv_pair 735 and isinstance(array_kv_pair.this, exp.Array) 736 and isinstance(array_kv_pair.expression, exp.Array) 737 ): 738 keys = array_kv_pair.this.expressions 739 values = array_kv_pair.expression.expressions 740 741 json_object.set( 742 "expressions", 743 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 744 ) 745 746 return json_object 747 748 def _parse_bracket( 749 self, this: t.Optional[exp.Expression] = None 750 ) -> t.Optional[exp.Expression]: 751 bracket = super()._parse_bracket(this) 752 753 if this is bracket: 754 return bracket 755 756 if isinstance(bracket, exp.Bracket): 757 for expression in bracket.expressions: 758 name = expression.name.upper() 759 760 if name not in self.BRACKET_OFFSETS: 761 break 762 763 offset, safe = self.BRACKET_OFFSETS[name] 764 bracket.set("offset", offset) 765 bracket.set("safe", safe) 766 expression.replace(expression.expressions[0]) 767 768 return bracket 769 770 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 771 unnest = super()._parse_unnest(with_alias=with_alias) 772 773 if not unnest: 774 return None 775 776 unnest_expr = seq_get(unnest.expressions, 0) 777 if unnest_expr: 778 from sqlglot.optimizer.annotate_types import annotate_types 779 780 unnest_expr = annotate_types(unnest_expr) 781 782 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 783 # in contrast to other dialects such as DuckDB which flattens only the array by default 784 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 785 array_elem.is_type(exp.DataType.Type.STRUCT) 786 for array_elem in unnest_expr._type.expressions 787 ): 788 unnest.set("explode_array", True) 789 790 return unnest 791 792 def _parse_make_interval(self) -> exp.MakeInterval: 793 expr = exp.MakeInterval() 794 795 for arg_key in expr.arg_types: 796 value = self._parse_lambda() 797 798 if not value: 799 break 800 801 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 802 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 803 if isinstance(value, exp.Kwarg): 804 arg_key = value.this.name 805 806 expr.set(arg_key, value) 807 808 self._match(TokenType.COMMA) 809 810 return expr 811 812 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 813 expr = self.expression( 814 exp.FeaturesAtTime, 815 this=(self._match(TokenType.TABLE) and self._parse_table()) 816 or self._parse_select(nested=True), 817 ) 818 819 while self._match(TokenType.COMMA): 820 arg = self._parse_lambda() 821 822 # Get the LHS of the Kwarg and set the arg to that value, e.g 823 # "num_rows => 1" sets the expr's `num_rows` arg 824 if arg: 825 expr.set(arg.this.name, arg) 826 827 return expr 828 829 class Generator(generator.Generator): 830 INTERVAL_ALLOWS_PLURAL_FORM = False 831 JOIN_HINTS = False 832 QUERY_HINTS = False 833 TABLE_HINTS = False 834 LIMIT_FETCH = "LIMIT" 835 RENAME_TABLE_WITH_DB = False 836 NVL2_SUPPORTED = False 837 UNNEST_WITH_ORDINALITY = False 838 COLLATE_IS_FUNC = True 839 LIMIT_ONLY_LITERALS = True 840 SUPPORTS_TABLE_ALIAS_COLUMNS = False 841 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 842 JSON_KEY_VALUE_PAIR_SEP = "," 843 NULL_ORDERING_SUPPORTED = False 844 IGNORE_NULLS_IN_FUNC = True 845 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 846 CAN_IMPLEMENT_ARRAY_ANY = True 847 SUPPORTS_TO_NUMBER = False 848 NAMED_PLACEHOLDER_TOKEN = "@" 849 HEX_FUNC = "TO_HEX" 850 WITH_PROPERTIES_PREFIX = "OPTIONS" 851 SUPPORTS_EXPLODING_PROJECTIONS = False 852 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 853 SUPPORTS_UNIX_SECONDS = True 854 855 TRANSFORMS = { 856 **generator.Generator.TRANSFORMS, 857 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 858 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 859 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 860 exp.Array: inline_array_unless_query, 861 exp.ArrayContains: _array_contains_sql, 862 exp.ArrayFilter: filter_array_using_unnest, 863 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 864 exp.CollateProperty: lambda self, e: ( 865 f"DEFAULT COLLATE {self.sql(e, 'this')}" 866 if e.args.get("default") 867 else f"COLLATE {self.sql(e, 'this')}" 868 ), 869 exp.Commit: lambda *_: "COMMIT TRANSACTION", 870 exp.CountIf: rename_func("COUNTIF"), 871 exp.Create: _create_sql, 872 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 873 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 874 exp.DateDiff: lambda self, e: self.func( 875 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 876 ), 877 exp.DateFromParts: rename_func("DATE"), 878 exp.DateStrToDate: datestrtodate_sql, 879 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 880 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 881 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 882 exp.DateTrunc: lambda self, e: self.func( 883 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 884 ), 885 exp.FromTimeZone: lambda self, e: self.func( 886 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 887 ), 888 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 889 exp.GroupConcat: rename_func("STRING_AGG"), 890 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 891 exp.If: if_sql(false_value="NULL"), 892 exp.ILike: no_ilike_sql, 893 exp.IntDiv: rename_func("DIV"), 894 exp.Int64: rename_func("INT64"), 895 exp.JSONExtract: _json_extract_sql, 896 exp.JSONExtractArray: _json_extract_sql, 897 exp.JSONExtractScalar: _json_extract_sql, 898 exp.JSONFormat: rename_func("TO_JSON_STRING"), 899 exp.Levenshtein: _levenshtein_sql, 900 exp.Max: max_or_greatest, 901 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 902 exp.MD5Digest: rename_func("MD5"), 903 exp.Min: min_or_least, 904 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 905 exp.RegexpExtract: lambda self, e: self.func( 906 "REGEXP_EXTRACT", 907 e.this, 908 e.expression, 909 e.args.get("position"), 910 e.args.get("occurrence"), 911 ), 912 exp.RegexpExtractAll: lambda self, e: self.func( 913 "REGEXP_EXTRACT_ALL", e.this, e.expression 914 ), 915 exp.RegexpReplace: regexp_replace_sql, 916 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 917 exp.ReturnsProperty: _returnsproperty_sql, 918 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 919 exp.Select: transforms.preprocess( 920 [ 921 transforms.explode_to_unnest(), 922 transforms.unqualify_unnest, 923 transforms.eliminate_distinct_on, 924 _alias_ordered_group, 925 transforms.eliminate_semi_and_anti_joins, 926 ] 927 ), 928 exp.SHA: rename_func("SHA1"), 929 exp.SHA2: sha256_sql, 930 exp.StabilityProperty: lambda self, e: ( 931 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 932 ), 933 exp.String: rename_func("STRING"), 934 exp.StrPosition: str_position_sql, 935 exp.StrToDate: _str_to_datetime_sql, 936 exp.StrToTime: _str_to_datetime_sql, 937 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 938 exp.TimeFromParts: rename_func("TIME"), 939 exp.TimestampFromParts: rename_func("DATETIME"), 940 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 941 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 942 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 943 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 944 exp.TimeStrToTime: timestrtotime_sql, 945 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 946 exp.TsOrDsAdd: _ts_or_ds_add_sql, 947 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 948 exp.TsOrDsToTime: rename_func("TIME"), 949 exp.TsOrDsToDatetime: rename_func("DATETIME"), 950 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 951 exp.Unhex: rename_func("FROM_HEX"), 952 exp.UnixDate: rename_func("UNIX_DATE"), 953 exp.UnixToTime: _unix_to_time_sql, 954 exp.Uuid: lambda *_: "GENERATE_UUID()", 955 exp.Values: _derived_table_values_to_unnest, 956 exp.VariancePop: rename_func("VAR_POP"), 957 } 958 959 SUPPORTED_JSON_PATH_PARTS = { 960 exp.JSONPathKey, 961 exp.JSONPathRoot, 962 exp.JSONPathSubscript, 963 } 964 965 TYPE_MAPPING = { 966 **generator.Generator.TYPE_MAPPING, 967 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 968 exp.DataType.Type.BIGINT: "INT64", 969 exp.DataType.Type.BINARY: "BYTES", 970 exp.DataType.Type.BOOLEAN: "BOOL", 971 exp.DataType.Type.CHAR: "STRING", 972 exp.DataType.Type.DECIMAL: "NUMERIC", 973 exp.DataType.Type.DOUBLE: "FLOAT64", 974 exp.DataType.Type.FLOAT: "FLOAT64", 975 exp.DataType.Type.INT: "INT64", 976 exp.DataType.Type.NCHAR: "STRING", 977 exp.DataType.Type.NVARCHAR: "STRING", 978 exp.DataType.Type.SMALLINT: "INT64", 979 exp.DataType.Type.TEXT: "STRING", 980 exp.DataType.Type.TIMESTAMP: "DATETIME", 981 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 982 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 983 exp.DataType.Type.TINYINT: "INT64", 984 exp.DataType.Type.ROWVERSION: "BYTES", 985 exp.DataType.Type.UUID: "STRING", 986 exp.DataType.Type.VARBINARY: "BYTES", 987 exp.DataType.Type.VARCHAR: "STRING", 988 exp.DataType.Type.VARIANT: "ANY TYPE", 989 } 990 991 PROPERTIES_LOCATION = { 992 **generator.Generator.PROPERTIES_LOCATION, 993 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 994 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 995 } 996 997 # WINDOW comes after QUALIFY 998 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 999 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1000 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1001 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1002 } 1003 1004 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1005 RESERVED_KEYWORDS = { 1006 "all", 1007 "and", 1008 "any", 1009 "array", 1010 "as", 1011 "asc", 1012 "assert_rows_modified", 1013 "at", 1014 "between", 1015 "by", 1016 "case", 1017 "cast", 1018 "collate", 1019 "contains", 1020 "create", 1021 "cross", 1022 "cube", 1023 "current", 1024 "default", 1025 "define", 1026 "desc", 1027 "distinct", 1028 "else", 1029 "end", 1030 "enum", 1031 "escape", 1032 "except", 1033 "exclude", 1034 "exists", 1035 "extract", 1036 "false", 1037 "fetch", 1038 "following", 1039 "for", 1040 "from", 1041 "full", 1042 "group", 1043 "grouping", 1044 "groups", 1045 "hash", 1046 "having", 1047 "if", 1048 "ignore", 1049 "in", 1050 "inner", 1051 "intersect", 1052 "interval", 1053 "into", 1054 "is", 1055 "join", 1056 "lateral", 1057 "left", 1058 "like", 1059 "limit", 1060 "lookup", 1061 "merge", 1062 "natural", 1063 "new", 1064 "no", 1065 "not", 1066 "null", 1067 "nulls", 1068 "of", 1069 "on", 1070 "or", 1071 "order", 1072 "outer", 1073 "over", 1074 "partition", 1075 "preceding", 1076 "proto", 1077 "qualify", 1078 "range", 1079 "recursive", 1080 "respect", 1081 "right", 1082 "rollup", 1083 "rows", 1084 "select", 1085 "set", 1086 "some", 1087 "struct", 1088 "tablesample", 1089 "then", 1090 "to", 1091 "treat", 1092 "true", 1093 "unbounded", 1094 "union", 1095 "unnest", 1096 "using", 1097 "when", 1098 "where", 1099 "window", 1100 "with", 1101 "within", 1102 } 1103 1104 def mod_sql(self, expression: exp.Mod) -> str: 1105 this = expression.this 1106 expr = expression.expression 1107 return self.func( 1108 "MOD", 1109 this.unnest() if isinstance(this, exp.Paren) else this, 1110 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1111 ) 1112 1113 def column_parts(self, expression: exp.Column) -> str: 1114 if expression.meta.get("quoted_column"): 1115 # If a column reference is of the form `dataset.table`.name, we need 1116 # to preserve the quoted table path, otherwise the reference breaks 1117 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1118 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1119 return f"{table_path}.{self.sql(expression, 'this')}" 1120 1121 return super().column_parts(expression) 1122 1123 def table_parts(self, expression: exp.Table) -> str: 1124 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1125 # we need to make sure the correct quoting is used in each case. 1126 # 1127 # For example, if there is a CTE x that clashes with a schema name, then the former will 1128 # return the table y in that schema, whereas the latter will return the CTE's y column: 1129 # 1130 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1131 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1132 if expression.meta.get("quoted_table"): 1133 table_parts = ".".join(p.name for p in expression.parts) 1134 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1135 1136 return super().table_parts(expression) 1137 1138 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1139 this = expression.this 1140 if isinstance(this, exp.TsOrDsToDatetime): 1141 func_name = "FORMAT_DATETIME" 1142 elif isinstance(this, exp.TsOrDsToTimestamp): 1143 func_name = "FORMAT_TIMESTAMP" 1144 else: 1145 func_name = "FORMAT_DATE" 1146 1147 time_expr = ( 1148 this 1149 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1150 else expression 1151 ) 1152 return self.func(func_name, self.format_time(expression), time_expr.this) 1153 1154 def eq_sql(self, expression: exp.EQ) -> str: 1155 # Operands of = cannot be NULL in BigQuery 1156 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1157 if not isinstance(expression.parent, exp.Update): 1158 return "NULL" 1159 1160 return self.binary(expression, "=") 1161 1162 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1163 parent = expression.parent 1164 1165 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1166 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1167 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1168 return self.func( 1169 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1170 ) 1171 1172 return super().attimezone_sql(expression) 1173 1174 def trycast_sql(self, expression: exp.TryCast) -> str: 1175 return self.cast_sql(expression, safe_prefix="SAFE_") 1176 1177 def bracket_sql(self, expression: exp.Bracket) -> str: 1178 this = expression.this 1179 expressions = expression.expressions 1180 1181 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1182 arg = expressions[0] 1183 if arg.type is None: 1184 from sqlglot.optimizer.annotate_types import annotate_types 1185 1186 arg = annotate_types(arg) 1187 1188 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1189 # BQ doesn't support bracket syntax with string values for structs 1190 return f"{self.sql(this)}.{arg.name}" 1191 1192 expressions_sql = self.expressions(expression, flat=True) 1193 offset = expression.args.get("offset") 1194 1195 if offset == 0: 1196 expressions_sql = f"OFFSET({expressions_sql})" 1197 elif offset == 1: 1198 expressions_sql = f"ORDINAL({expressions_sql})" 1199 elif offset is not None: 1200 self.unsupported(f"Unsupported array offset: {offset}") 1201 1202 if expression.args.get("safe"): 1203 expressions_sql = f"SAFE_{expressions_sql}" 1204 1205 return f"{self.sql(this)}[{expressions_sql}]" 1206 1207 def in_unnest_op(self, expression: exp.Unnest) -> str: 1208 return self.sql(expression) 1209 1210 def version_sql(self, expression: exp.Version) -> str: 1211 if expression.name == "TIMESTAMP": 1212 expression.set("this", "SYSTEM_TIME") 1213 return super().version_sql(expression) 1214 1215 def contains_sql(self, expression: exp.Contains) -> str: 1216 this = expression.this 1217 expr = expression.expression 1218 1219 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1220 this = this.this 1221 expr = expr.this 1222 1223 return self.func("CONTAINS_SUBSTRING", this, expr)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
423 def normalize_identifier(self, expression: E) -> E: 424 if ( 425 isinstance(expression, exp.Identifier) 426 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 427 ): 428 parent = expression.parent 429 while isinstance(parent, exp.Dot): 430 parent = parent.parent 431 432 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 433 # by default. The following check uses a heuristic to detect tables based on whether 434 # they are qualified. This should generally be correct, because tables in BigQuery 435 # must be qualified with at least a dataset, unless @@dataset_id is set. 436 case_sensitive = ( 437 isinstance(parent, exp.UserDefinedFunction) 438 or ( 439 isinstance(parent, exp.Table) 440 and parent.db 441 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 442 ) 443 or expression.meta.get("is_table") 444 ) 445 if not case_sensitive: 446 expression.set("this", expression.this.lower()) 447 448 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
450 class Tokenizer(tokens.Tokenizer): 451 QUOTES = ["'", '"', '"""', "'''"] 452 COMMENTS = ["--", "#", ("/*", "*/")] 453 IDENTIFIERS = ["`"] 454 STRING_ESCAPES = ["\\"] 455 456 HEX_STRINGS = [("0x", ""), ("0X", "")] 457 458 BYTE_STRINGS = [ 459 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 460 ] 461 462 RAW_STRINGS = [ 463 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 464 ] 465 466 KEYWORDS = { 467 **tokens.Tokenizer.KEYWORDS, 468 "ANY TYPE": TokenType.VARIANT, 469 "BEGIN": TokenType.COMMAND, 470 "BEGIN TRANSACTION": TokenType.BEGIN, 471 "BYTEINT": TokenType.INT, 472 "BYTES": TokenType.BINARY, 473 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 474 "DATETIME": TokenType.TIMESTAMP, 475 "DECLARE": TokenType.COMMAND, 476 "ELSEIF": TokenType.COMMAND, 477 "EXCEPTION": TokenType.COMMAND, 478 "FLOAT64": TokenType.DOUBLE, 479 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 480 "MODEL": TokenType.MODEL, 481 "NOT DETERMINISTIC": TokenType.VOLATILE, 482 "RECORD": TokenType.STRUCT, 483 "TIMESTAMP": TokenType.TIMESTAMPTZ, 484 } 485 KEYWORDS.pop("DIV") 486 KEYWORDS.pop("VALUES") 487 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
489 class Parser(parser.Parser): 490 PREFIXED_PIVOT_COLUMNS = True 491 LOG_DEFAULTS_TO_LN = True 492 SUPPORTS_IMPLICIT_UNNEST = True 493 494 FUNCTIONS = { 495 **parser.Parser.FUNCTIONS, 496 "CONTAINS_SUBSTRING": _build_contains_substring, 497 "DATE": _build_date, 498 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 499 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 500 "DATE_TRUNC": lambda args: exp.DateTrunc( 501 unit=exp.Literal.string(str(seq_get(args, 1))), 502 this=seq_get(args, 0), 503 zone=seq_get(args, 2), 504 ), 505 "DATETIME": _build_datetime, 506 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 507 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 508 "DIV": binary_from_function(exp.IntDiv), 509 "EDIT_DISTANCE": _build_levenshtein, 510 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 511 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 512 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 513 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 514 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 515 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 517 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 518 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 519 "MD5": exp.MD5Digest.from_arg_list, 520 "TO_HEX": _build_to_hex, 521 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 522 [seq_get(args, 1), seq_get(args, 0)] 523 ), 524 "PARSE_TIMESTAMP": _build_parse_timestamp, 525 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 526 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 527 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 528 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 529 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 530 ), 531 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 532 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 533 "SPLIT": lambda args: exp.Split( 534 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 535 this=seq_get(args, 0), 536 expression=seq_get(args, 1) or exp.Literal.string(","), 537 ), 538 "STRPOS": exp.StrPosition.from_arg_list, 539 "TIME": _build_time, 540 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 541 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 542 "TIMESTAMP": _build_timestamp, 543 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 544 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 545 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 546 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 547 ), 548 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 549 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 550 ), 551 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 552 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 553 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 554 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 555 } 556 557 FUNCTION_PARSERS = { 558 **parser.Parser.FUNCTION_PARSERS, 559 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 560 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 561 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 562 } 563 FUNCTION_PARSERS.pop("TRIM") 564 565 NO_PAREN_FUNCTIONS = { 566 **parser.Parser.NO_PAREN_FUNCTIONS, 567 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 568 } 569 570 NESTED_TYPE_TOKENS = { 571 *parser.Parser.NESTED_TYPE_TOKENS, 572 TokenType.TABLE, 573 } 574 575 PROPERTY_PARSERS = { 576 **parser.Parser.PROPERTY_PARSERS, 577 "NOT DETERMINISTIC": lambda self: self.expression( 578 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 579 ), 580 "OPTIONS": lambda self: self._parse_with_property(), 581 } 582 583 CONSTRAINT_PARSERS = { 584 **parser.Parser.CONSTRAINT_PARSERS, 585 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 586 } 587 588 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 589 RANGE_PARSERS.pop(TokenType.OVERLAPS) 590 591 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 592 593 STATEMENT_PARSERS = { 594 **parser.Parser.STATEMENT_PARSERS, 595 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 596 TokenType.END: lambda self: self._parse_as_command(self._prev), 597 TokenType.FOR: lambda self: self._parse_for_in(), 598 } 599 600 BRACKET_OFFSETS = { 601 "OFFSET": (0, False), 602 "ORDINAL": (1, False), 603 "SAFE_OFFSET": (0, True), 604 "SAFE_ORDINAL": (1, True), 605 } 606 607 def _parse_for_in(self) -> exp.ForIn: 608 this = self._parse_range() 609 self._match_text_seq("DO") 610 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 611 612 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 613 this = super()._parse_table_part(schema=schema) or self._parse_number() 614 615 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 616 if isinstance(this, exp.Identifier): 617 table_name = this.name 618 while self._match(TokenType.DASH, advance=False) and self._next: 619 text = "" 620 while self._is_connected() and self._curr.token_type != TokenType.DOT: 621 self._advance() 622 text += self._prev.text 623 table_name += text 624 625 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 626 elif isinstance(this, exp.Literal): 627 table_name = this.name 628 629 if self._is_connected() and self._parse_var(any_token=True): 630 table_name += self._prev.text 631 632 this = exp.Identifier(this=table_name, quoted=True) 633 634 return this 635 636 def _parse_table_parts( 637 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 638 ) -> exp.Table: 639 table = super()._parse_table_parts( 640 schema=schema, is_db_reference=is_db_reference, wildcard=True 641 ) 642 643 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 644 if not table.catalog: 645 if table.db: 646 parts = table.db.split(".") 647 if len(parts) == 2 and not table.args["db"].quoted: 648 table.set("catalog", exp.Identifier(this=parts[0])) 649 table.set("db", exp.Identifier(this=parts[1])) 650 else: 651 parts = table.name.split(".") 652 if len(parts) == 2 and not table.this.quoted: 653 table.set("db", exp.Identifier(this=parts[0])) 654 table.set("this", exp.Identifier(this=parts[1])) 655 656 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 657 alias = table.this 658 catalog, db, this, *rest = ( 659 exp.to_identifier(p, quoted=True) 660 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 661 ) 662 663 if rest and this: 664 this = exp.Dot.build([this, *rest]) # type: ignore 665 666 table = exp.Table( 667 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 668 ) 669 table.meta["quoted_table"] = True 670 else: 671 alias = None 672 673 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 674 # dataset, so if the project identifier is omitted we need to fix the ast so that 675 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 676 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 677 # views, because it would seem like the "catalog" part is set, when it'd actually 678 # be the region/dataset. Merging the two identifiers into a single one is done to 679 # avoid producing a 4-part Table reference, which would cause issues in the schema 680 # module, when there are 3-part table names mixed with information schema views. 681 # 682 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 683 table_parts = table.parts 684 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 685 # We need to alias the table here to avoid breaking existing qualified columns. 686 # This is expected to be safe, because if there's an actual alias coming up in 687 # the token stream, it will overwrite this one. If there isn't one, we are only 688 # exposing the name that can be used to reference the view explicitly (a no-op). 689 exp.alias_( 690 table, 691 t.cast(exp.Identifier, alias or table_parts[-1]), 692 table=True, 693 copy=False, 694 ) 695 696 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 697 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 698 table.set("db", seq_get(table_parts, -3)) 699 table.set("catalog", seq_get(table_parts, -4)) 700 701 return table 702 703 def _parse_column(self) -> t.Optional[exp.Expression]: 704 column = super()._parse_column() 705 if isinstance(column, exp.Column): 706 parts = column.parts 707 if any("." in p.name for p in parts): 708 catalog, db, table, this, *rest = ( 709 exp.to_identifier(p, quoted=True) 710 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 711 ) 712 713 if rest and this: 714 this = exp.Dot.build([this, *rest]) # type: ignore 715 716 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 717 column.meta["quoted_column"] = True 718 719 return column 720 721 @t.overload 722 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 723 724 @t.overload 725 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 726 727 def _parse_json_object(self, agg=False): 728 json_object = super()._parse_json_object() 729 array_kv_pair = seq_get(json_object.expressions, 0) 730 731 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 732 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 733 if ( 734 array_kv_pair 735 and isinstance(array_kv_pair.this, exp.Array) 736 and isinstance(array_kv_pair.expression, exp.Array) 737 ): 738 keys = array_kv_pair.this.expressions 739 values = array_kv_pair.expression.expressions 740 741 json_object.set( 742 "expressions", 743 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 744 ) 745 746 return json_object 747 748 def _parse_bracket( 749 self, this: t.Optional[exp.Expression] = None 750 ) -> t.Optional[exp.Expression]: 751 bracket = super()._parse_bracket(this) 752 753 if this is bracket: 754 return bracket 755 756 if isinstance(bracket, exp.Bracket): 757 for expression in bracket.expressions: 758 name = expression.name.upper() 759 760 if name not in self.BRACKET_OFFSETS: 761 break 762 763 offset, safe = self.BRACKET_OFFSETS[name] 764 bracket.set("offset", offset) 765 bracket.set("safe", safe) 766 expression.replace(expression.expressions[0]) 767 768 return bracket 769 770 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 771 unnest = super()._parse_unnest(with_alias=with_alias) 772 773 if not unnest: 774 return None 775 776 unnest_expr = seq_get(unnest.expressions, 0) 777 if unnest_expr: 778 from sqlglot.optimizer.annotate_types import annotate_types 779 780 unnest_expr = annotate_types(unnest_expr) 781 782 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 783 # in contrast to other dialects such as DuckDB which flattens only the array by default 784 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 785 array_elem.is_type(exp.DataType.Type.STRUCT) 786 for array_elem in unnest_expr._type.expressions 787 ): 788 unnest.set("explode_array", True) 789 790 return unnest 791 792 def _parse_make_interval(self) -> exp.MakeInterval: 793 expr = exp.MakeInterval() 794 795 for arg_key in expr.arg_types: 796 value = self._parse_lambda() 797 798 if not value: 799 break 800 801 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 802 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 803 if isinstance(value, exp.Kwarg): 804 arg_key = value.this.name 805 806 expr.set(arg_key, value) 807 808 self._match(TokenType.COMMA) 809 810 return expr 811 812 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 813 expr = self.expression( 814 exp.FeaturesAtTime, 815 this=(self._match(TokenType.TABLE) and self._parse_table()) 816 or self._parse_select(nested=True), 817 ) 818 819 while self._match(TokenType.COMMA): 820 arg = self._parse_lambda() 821 822 # Get the LHS of the Kwarg and set the arg to that value, e.g 823 # "num_rows => 1" sets the expr's `num_rows` arg 824 if arg: 825 expr.set(arg.this.name, arg) 826 827 return expr
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
829 class Generator(generator.Generator): 830 INTERVAL_ALLOWS_PLURAL_FORM = False 831 JOIN_HINTS = False 832 QUERY_HINTS = False 833 TABLE_HINTS = False 834 LIMIT_FETCH = "LIMIT" 835 RENAME_TABLE_WITH_DB = False 836 NVL2_SUPPORTED = False 837 UNNEST_WITH_ORDINALITY = False 838 COLLATE_IS_FUNC = True 839 LIMIT_ONLY_LITERALS = True 840 SUPPORTS_TABLE_ALIAS_COLUMNS = False 841 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 842 JSON_KEY_VALUE_PAIR_SEP = "," 843 NULL_ORDERING_SUPPORTED = False 844 IGNORE_NULLS_IN_FUNC = True 845 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 846 CAN_IMPLEMENT_ARRAY_ANY = True 847 SUPPORTS_TO_NUMBER = False 848 NAMED_PLACEHOLDER_TOKEN = "@" 849 HEX_FUNC = "TO_HEX" 850 WITH_PROPERTIES_PREFIX = "OPTIONS" 851 SUPPORTS_EXPLODING_PROJECTIONS = False 852 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 853 SUPPORTS_UNIX_SECONDS = True 854 855 TRANSFORMS = { 856 **generator.Generator.TRANSFORMS, 857 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 858 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 859 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 860 exp.Array: inline_array_unless_query, 861 exp.ArrayContains: _array_contains_sql, 862 exp.ArrayFilter: filter_array_using_unnest, 863 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 864 exp.CollateProperty: lambda self, e: ( 865 f"DEFAULT COLLATE {self.sql(e, 'this')}" 866 if e.args.get("default") 867 else f"COLLATE {self.sql(e, 'this')}" 868 ), 869 exp.Commit: lambda *_: "COMMIT TRANSACTION", 870 exp.CountIf: rename_func("COUNTIF"), 871 exp.Create: _create_sql, 872 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 873 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 874 exp.DateDiff: lambda self, e: self.func( 875 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 876 ), 877 exp.DateFromParts: rename_func("DATE"), 878 exp.DateStrToDate: datestrtodate_sql, 879 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 880 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 881 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 882 exp.DateTrunc: lambda self, e: self.func( 883 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 884 ), 885 exp.FromTimeZone: lambda self, e: self.func( 886 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 887 ), 888 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 889 exp.GroupConcat: rename_func("STRING_AGG"), 890 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 891 exp.If: if_sql(false_value="NULL"), 892 exp.ILike: no_ilike_sql, 893 exp.IntDiv: rename_func("DIV"), 894 exp.Int64: rename_func("INT64"), 895 exp.JSONExtract: _json_extract_sql, 896 exp.JSONExtractArray: _json_extract_sql, 897 exp.JSONExtractScalar: _json_extract_sql, 898 exp.JSONFormat: rename_func("TO_JSON_STRING"), 899 exp.Levenshtein: _levenshtein_sql, 900 exp.Max: max_or_greatest, 901 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 902 exp.MD5Digest: rename_func("MD5"), 903 exp.Min: min_or_least, 904 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 905 exp.RegexpExtract: lambda self, e: self.func( 906 "REGEXP_EXTRACT", 907 e.this, 908 e.expression, 909 e.args.get("position"), 910 e.args.get("occurrence"), 911 ), 912 exp.RegexpExtractAll: lambda self, e: self.func( 913 "REGEXP_EXTRACT_ALL", e.this, e.expression 914 ), 915 exp.RegexpReplace: regexp_replace_sql, 916 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 917 exp.ReturnsProperty: _returnsproperty_sql, 918 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 919 exp.Select: transforms.preprocess( 920 [ 921 transforms.explode_to_unnest(), 922 transforms.unqualify_unnest, 923 transforms.eliminate_distinct_on, 924 _alias_ordered_group, 925 transforms.eliminate_semi_and_anti_joins, 926 ] 927 ), 928 exp.SHA: rename_func("SHA1"), 929 exp.SHA2: sha256_sql, 930 exp.StabilityProperty: lambda self, e: ( 931 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 932 ), 933 exp.String: rename_func("STRING"), 934 exp.StrPosition: str_position_sql, 935 exp.StrToDate: _str_to_datetime_sql, 936 exp.StrToTime: _str_to_datetime_sql, 937 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 938 exp.TimeFromParts: rename_func("TIME"), 939 exp.TimestampFromParts: rename_func("DATETIME"), 940 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 941 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 942 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 943 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 944 exp.TimeStrToTime: timestrtotime_sql, 945 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 946 exp.TsOrDsAdd: _ts_or_ds_add_sql, 947 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 948 exp.TsOrDsToTime: rename_func("TIME"), 949 exp.TsOrDsToDatetime: rename_func("DATETIME"), 950 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 951 exp.Unhex: rename_func("FROM_HEX"), 952 exp.UnixDate: rename_func("UNIX_DATE"), 953 exp.UnixToTime: _unix_to_time_sql, 954 exp.Uuid: lambda *_: "GENERATE_UUID()", 955 exp.Values: _derived_table_values_to_unnest, 956 exp.VariancePop: rename_func("VAR_POP"), 957 } 958 959 SUPPORTED_JSON_PATH_PARTS = { 960 exp.JSONPathKey, 961 exp.JSONPathRoot, 962 exp.JSONPathSubscript, 963 } 964 965 TYPE_MAPPING = { 966 **generator.Generator.TYPE_MAPPING, 967 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 968 exp.DataType.Type.BIGINT: "INT64", 969 exp.DataType.Type.BINARY: "BYTES", 970 exp.DataType.Type.BOOLEAN: "BOOL", 971 exp.DataType.Type.CHAR: "STRING", 972 exp.DataType.Type.DECIMAL: "NUMERIC", 973 exp.DataType.Type.DOUBLE: "FLOAT64", 974 exp.DataType.Type.FLOAT: "FLOAT64", 975 exp.DataType.Type.INT: "INT64", 976 exp.DataType.Type.NCHAR: "STRING", 977 exp.DataType.Type.NVARCHAR: "STRING", 978 exp.DataType.Type.SMALLINT: "INT64", 979 exp.DataType.Type.TEXT: "STRING", 980 exp.DataType.Type.TIMESTAMP: "DATETIME", 981 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 982 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 983 exp.DataType.Type.TINYINT: "INT64", 984 exp.DataType.Type.ROWVERSION: "BYTES", 985 exp.DataType.Type.UUID: "STRING", 986 exp.DataType.Type.VARBINARY: "BYTES", 987 exp.DataType.Type.VARCHAR: "STRING", 988 exp.DataType.Type.VARIANT: "ANY TYPE", 989 } 990 991 PROPERTIES_LOCATION = { 992 **generator.Generator.PROPERTIES_LOCATION, 993 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 994 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 995 } 996 997 # WINDOW comes after QUALIFY 998 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 999 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1000 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1001 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1002 } 1003 1004 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1005 RESERVED_KEYWORDS = { 1006 "all", 1007 "and", 1008 "any", 1009 "array", 1010 "as", 1011 "asc", 1012 "assert_rows_modified", 1013 "at", 1014 "between", 1015 "by", 1016 "case", 1017 "cast", 1018 "collate", 1019 "contains", 1020 "create", 1021 "cross", 1022 "cube", 1023 "current", 1024 "default", 1025 "define", 1026 "desc", 1027 "distinct", 1028 "else", 1029 "end", 1030 "enum", 1031 "escape", 1032 "except", 1033 "exclude", 1034 "exists", 1035 "extract", 1036 "false", 1037 "fetch", 1038 "following", 1039 "for", 1040 "from", 1041 "full", 1042 "group", 1043 "grouping", 1044 "groups", 1045 "hash", 1046 "having", 1047 "if", 1048 "ignore", 1049 "in", 1050 "inner", 1051 "intersect", 1052 "interval", 1053 "into", 1054 "is", 1055 "join", 1056 "lateral", 1057 "left", 1058 "like", 1059 "limit", 1060 "lookup", 1061 "merge", 1062 "natural", 1063 "new", 1064 "no", 1065 "not", 1066 "null", 1067 "nulls", 1068 "of", 1069 "on", 1070 "or", 1071 "order", 1072 "outer", 1073 "over", 1074 "partition", 1075 "preceding", 1076 "proto", 1077 "qualify", 1078 "range", 1079 "recursive", 1080 "respect", 1081 "right", 1082 "rollup", 1083 "rows", 1084 "select", 1085 "set", 1086 "some", 1087 "struct", 1088 "tablesample", 1089 "then", 1090 "to", 1091 "treat", 1092 "true", 1093 "unbounded", 1094 "union", 1095 "unnest", 1096 "using", 1097 "when", 1098 "where", 1099 "window", 1100 "with", 1101 "within", 1102 } 1103 1104 def mod_sql(self, expression: exp.Mod) -> str: 1105 this = expression.this 1106 expr = expression.expression 1107 return self.func( 1108 "MOD", 1109 this.unnest() if isinstance(this, exp.Paren) else this, 1110 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1111 ) 1112 1113 def column_parts(self, expression: exp.Column) -> str: 1114 if expression.meta.get("quoted_column"): 1115 # If a column reference is of the form `dataset.table`.name, we need 1116 # to preserve the quoted table path, otherwise the reference breaks 1117 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1118 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1119 return f"{table_path}.{self.sql(expression, 'this')}" 1120 1121 return super().column_parts(expression) 1122 1123 def table_parts(self, expression: exp.Table) -> str: 1124 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1125 # we need to make sure the correct quoting is used in each case. 1126 # 1127 # For example, if there is a CTE x that clashes with a schema name, then the former will 1128 # return the table y in that schema, whereas the latter will return the CTE's y column: 1129 # 1130 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1131 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1132 if expression.meta.get("quoted_table"): 1133 table_parts = ".".join(p.name for p in expression.parts) 1134 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1135 1136 return super().table_parts(expression) 1137 1138 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1139 this = expression.this 1140 if isinstance(this, exp.TsOrDsToDatetime): 1141 func_name = "FORMAT_DATETIME" 1142 elif isinstance(this, exp.TsOrDsToTimestamp): 1143 func_name = "FORMAT_TIMESTAMP" 1144 else: 1145 func_name = "FORMAT_DATE" 1146 1147 time_expr = ( 1148 this 1149 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1150 else expression 1151 ) 1152 return self.func(func_name, self.format_time(expression), time_expr.this) 1153 1154 def eq_sql(self, expression: exp.EQ) -> str: 1155 # Operands of = cannot be NULL in BigQuery 1156 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1157 if not isinstance(expression.parent, exp.Update): 1158 return "NULL" 1159 1160 return self.binary(expression, "=") 1161 1162 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1163 parent = expression.parent 1164 1165 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1166 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1167 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1168 return self.func( 1169 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1170 ) 1171 1172 return super().attimezone_sql(expression) 1173 1174 def trycast_sql(self, expression: exp.TryCast) -> str: 1175 return self.cast_sql(expression, safe_prefix="SAFE_") 1176 1177 def bracket_sql(self, expression: exp.Bracket) -> str: 1178 this = expression.this 1179 expressions = expression.expressions 1180 1181 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1182 arg = expressions[0] 1183 if arg.type is None: 1184 from sqlglot.optimizer.annotate_types import annotate_types 1185 1186 arg = annotate_types(arg) 1187 1188 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1189 # BQ doesn't support bracket syntax with string values for structs 1190 return f"{self.sql(this)}.{arg.name}" 1191 1192 expressions_sql = self.expressions(expression, flat=True) 1193 offset = expression.args.get("offset") 1194 1195 if offset == 0: 1196 expressions_sql = f"OFFSET({expressions_sql})" 1197 elif offset == 1: 1198 expressions_sql = f"ORDINAL({expressions_sql})" 1199 elif offset is not None: 1200 self.unsupported(f"Unsupported array offset: {offset}") 1201 1202 if expression.args.get("safe"): 1203 expressions_sql = f"SAFE_{expressions_sql}" 1204 1205 return f"{self.sql(this)}[{expressions_sql}]" 1206 1207 def in_unnest_op(self, expression: exp.Unnest) -> str: 1208 return self.sql(expression) 1209 1210 def version_sql(self, expression: exp.Version) -> str: 1211 if expression.name == "TIMESTAMP": 1212 expression.set("this", "SYSTEM_TIME") 1213 return super().version_sql(expression) 1214 1215 def contains_sql(self, expression: exp.Contains) -> str: 1216 this = expression.this 1217 expr = expression.expression 1218 1219 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1220 this = this.this 1221 expr = expr.this 1222 1223 return self.func("CONTAINS_SUBSTRING", this, expr)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1113 def column_parts(self, expression: exp.Column) -> str: 1114 if expression.meta.get("quoted_column"): 1115 # If a column reference is of the form `dataset.table`.name, we need 1116 # to preserve the quoted table path, otherwise the reference breaks 1117 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1118 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1119 return f"{table_path}.{self.sql(expression, 'this')}" 1120 1121 return super().column_parts(expression)
1123 def table_parts(self, expression: exp.Table) -> str: 1124 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1125 # we need to make sure the correct quoting is used in each case. 1126 # 1127 # For example, if there is a CTE x that clashes with a schema name, then the former will 1128 # return the table y in that schema, whereas the latter will return the CTE's y column: 1129 # 1130 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1131 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1132 if expression.meta.get("quoted_table"): 1133 table_parts = ".".join(p.name for p in expression.parts) 1134 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1135 1136 return super().table_parts(expression)
1138 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1139 this = expression.this 1140 if isinstance(this, exp.TsOrDsToDatetime): 1141 func_name = "FORMAT_DATETIME" 1142 elif isinstance(this, exp.TsOrDsToTimestamp): 1143 func_name = "FORMAT_TIMESTAMP" 1144 else: 1145 func_name = "FORMAT_DATE" 1146 1147 time_expr = ( 1148 this 1149 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1150 else expression 1151 ) 1152 return self.func(func_name, self.format_time(expression), time_expr.this)
1154 def eq_sql(self, expression: exp.EQ) -> str: 1155 # Operands of = cannot be NULL in BigQuery 1156 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1157 if not isinstance(expression.parent, exp.Update): 1158 return "NULL" 1159 1160 return self.binary(expression, "=")
1162 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1163 parent = expression.parent 1164 1165 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1166 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1167 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1168 return self.func( 1169 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1170 ) 1171 1172 return super().attimezone_sql(expression)
1177 def bracket_sql(self, expression: exp.Bracket) -> str: 1178 this = expression.this 1179 expressions = expression.expressions 1180 1181 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1182 arg = expressions[0] 1183 if arg.type is None: 1184 from sqlglot.optimizer.annotate_types import annotate_types 1185 1186 arg = annotate_types(arg) 1187 1188 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1189 # BQ doesn't support bracket syntax with string values for structs 1190 return f"{self.sql(this)}.{arg.name}" 1191 1192 expressions_sql = self.expressions(expression, flat=True) 1193 offset = expression.args.get("offset") 1194 1195 if offset == 0: 1196 expressions_sql = f"OFFSET({expressions_sql})" 1197 elif offset == 1: 1198 expressions_sql = f"ORDINAL({expressions_sql})" 1199 elif offset is not None: 1200 self.unsupported(f"Unsupported array offset: {offset}") 1201 1202 if expression.args.get("safe"): 1203 expressions_sql = f"SAFE_{expressions_sql}" 1204 1205 return f"{self.sql(this)}[{expressions_sql}]"
1215 def contains_sql(self, expression: exp.Contains) -> str: 1216 this = expression.this 1217 expr = expression.expression 1218 1219 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1220 this = this.this 1221 expr = expr.this 1222 1223 return self.func("CONTAINS_SUBSTRING", this, expr)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql