sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29 strposition_sql, 30 groupconcat_sql, 31) 32from sqlglot.helper import seq_get, split_num_words 33from sqlglot.tokens import TokenType 34from sqlglot.generator import unsupported_args 35 36if t.TYPE_CHECKING: 37 from sqlglot._typing import E, Lit 38 39 from sqlglot.optimizer.annotate_types import TypeAnnotator 40 41logger = logging.getLogger("sqlglot") 42 43 44JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 45 46DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 47 48 49def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 50 if not expression.find_ancestor(exp.From, exp.Join): 51 return self.values_sql(expression) 52 53 structs = [] 54 alias = expression.args.get("alias") 55 for tup in expression.find_all(exp.Tuple): 56 field_aliases = ( 57 alias.columns 58 if alias and alias.columns 59 else (f"_c{i}" for i in range(len(tup.expressions))) 60 ) 61 expressions = [ 62 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 63 for name, fld in zip(field_aliases, tup.expressions) 64 ] 65 structs.append(exp.Struct(expressions=expressions)) 66 67 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 68 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 69 return self.unnest_sql( 70 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 71 ) 72 73 74def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 75 this = expression.this 76 if isinstance(this, exp.Schema): 77 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 78 else: 79 this = self.sql(this) 80 return f"RETURNS {this}" 81 82 83def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 84 returns = expression.find(exp.ReturnsProperty) 85 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 86 expression.set("kind", "TABLE FUNCTION") 87 88 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 89 expression.set("expression", expression.expression.this) 90 91 return self.create_sql(expression) 92 93 94# https://issuetracker.google.com/issues/162294746 95# workaround for bigquery bug when grouping by an expression and then ordering 96# WITH x AS (SELECT 1 y) 97# SELECT y + 1 z 98# FROM x 99# GROUP BY x + 1 100# ORDER by z 101def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 102 if isinstance(expression, exp.Select): 103 group = expression.args.get("group") 104 order = expression.args.get("order") 105 106 if group and order: 107 aliases = { 108 select.this: select.args["alias"] 109 for select in expression.selects 110 if isinstance(select, exp.Alias) 111 } 112 113 for grouped in group.expressions: 114 if grouped.is_int: 115 continue 116 alias = aliases.get(grouped) 117 if alias: 118 grouped.replace(exp.column(alias)) 119 120 return expression 121 122 123def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 124 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 125 if isinstance(expression, exp.CTE) and expression.alias_column_names: 126 cte_query = expression.this 127 128 if cte_query.is_star: 129 logger.warning( 130 "Can't push down CTE column names for star queries. Run the query through" 131 " the optimizer or use 'qualify' to expand the star projections first." 132 ) 133 return expression 134 135 column_names = expression.alias_column_names 136 expression.args["alias"].set("columns", None) 137 138 for name, select in zip(column_names, cte_query.selects): 139 to_replace = select 140 141 if isinstance(select, exp.Alias): 142 select = select.this 143 144 # Inner aliases are shadowed by the CTE column names 145 to_replace.replace(exp.alias_(select, name)) 146 147 return expression 148 149 150def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 151 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 152 this.set("zone", seq_get(args, 2)) 153 return this 154 155 156def _build_timestamp(args: t.List) -> exp.Timestamp: 157 timestamp = exp.Timestamp.from_arg_list(args) 158 timestamp.set("with_tz", True) 159 return timestamp 160 161 162def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 163 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 164 return expr_type.from_arg_list(args) 165 166 167def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 168 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 169 arg = seq_get(args, 0) 170 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 171 172 173def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 174 return self.sql( 175 exp.Exists( 176 this=exp.select("1") 177 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 178 .where(exp.column("_col").eq(expression.right)) 179 ) 180 ) 181 182 183def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 184 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 185 186 187def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 188 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 189 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 190 unit = unit_to_var(expression) 191 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 192 193 194def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 195 scale = expression.args.get("scale") 196 timestamp = expression.this 197 198 if scale in (None, exp.UnixToTime.SECONDS): 199 return self.func("TIMESTAMP_SECONDS", timestamp) 200 if scale == exp.UnixToTime.MILLIS: 201 return self.func("TIMESTAMP_MILLIS", timestamp) 202 if scale == exp.UnixToTime.MICROS: 203 return self.func("TIMESTAMP_MICROS", timestamp) 204 205 unix_seconds = exp.cast( 206 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 207 ) 208 return self.func("TIMESTAMP_SECONDS", unix_seconds) 209 210 211def _build_time(args: t.List) -> exp.Func: 212 if len(args) == 1: 213 return exp.TsOrDsToTime(this=args[0]) 214 if len(args) == 2: 215 return exp.Time.from_arg_list(args) 216 return exp.TimeFromParts.from_arg_list(args) 217 218 219def _build_datetime(args: t.List) -> exp.Func: 220 if len(args) == 1: 221 return exp.TsOrDsToDatetime.from_arg_list(args) 222 if len(args) == 2: 223 return exp.Datetime.from_arg_list(args) 224 return exp.TimestampFromParts.from_arg_list(args) 225 226 227def _build_regexp_extract( 228 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 229) -> t.Callable[[t.List], E]: 230 def _builder(args: t.List) -> E: 231 try: 232 group = re.compile(args[1].name).groups == 1 233 except re.error: 234 group = False 235 236 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 237 return expr_type( 238 this=seq_get(args, 0), 239 expression=seq_get(args, 1), 240 position=seq_get(args, 2), 241 occurrence=seq_get(args, 3), 242 group=exp.Literal.number(1) if group else default_group, 243 ) 244 245 return _builder 246 247 248def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 249 def _builder(args: t.List, dialect: Dialect) -> E: 250 if len(args) == 1: 251 # The default value for the JSONPath is '$' i.e all of the data 252 args.append(exp.Literal.string("$")) 253 return parser.build_extract_json_with_path(expr_type)(args, dialect) 254 255 return _builder 256 257 258def _str_to_datetime_sql( 259 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 260) -> str: 261 this = self.sql(expression, "this") 262 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 263 264 if expression.args.get("safe"): 265 fmt = self.format_time( 266 expression, 267 self.dialect.INVERSE_FORMAT_MAPPING, 268 self.dialect.INVERSE_FORMAT_TRIE, 269 ) 270 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 271 272 fmt = self.format_time(expression) 273 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 274 275 276def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 277 """ 278 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 279 +---------+---------+---------+------------+---------+ 280 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 281 +---------+---------+---------+------------+---------+ 282 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 283 +---------+---------+---------+------------+---------+ 284 """ 285 self._annotate_args(expression) 286 287 this: exp.Expression = expression.this 288 289 self._set_type( 290 expression, 291 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 292 ) 293 return expression 294 295 296@unsupported_args("ins_cost", "del_cost", "sub_cost") 297def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 298 max_dist = expression.args.get("max_dist") 299 if max_dist: 300 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 301 302 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 303 304 305def _build_levenshtein(args: t.List) -> exp.Levenshtein: 306 max_dist = seq_get(args, 2) 307 return exp.Levenshtein( 308 this=seq_get(args, 0), 309 expression=seq_get(args, 1), 310 max_dist=max_dist.expression if max_dist else None, 311 ) 312 313 314def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 315 def _builder(args: t.List) -> exp.TimeToStr: 316 return exp.TimeToStr(this=expr_type(this=seq_get(args, 1)), format=seq_get(args, 0)) 317 318 return _builder 319 320 321def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 322 if len(args) == 3: 323 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 324 325 # Lowercase the operands in case of transpilation, as exp.Contains 326 # is case-sensitive on other dialects 327 this = exp.Lower(this=seq_get(args, 0)) 328 expr = exp.Lower(this=seq_get(args, 1)) 329 330 return exp.Contains(this=this, expression=expr) 331 332 333def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 334 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 335 upper = name.upper() 336 337 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 338 339 if dquote_escaping: 340 self._quote_json_path_key_using_brackets = False 341 342 sql = rename_func(upper)(self, expression) 343 344 if dquote_escaping: 345 self._quote_json_path_key_using_brackets = True 346 347 return sql 348 349 350class BigQuery(Dialect): 351 WEEK_OFFSET = -1 352 UNNEST_COLUMN_ONLY = True 353 SUPPORTS_USER_DEFINED_TYPES = False 354 SUPPORTS_SEMI_ANTI_JOIN = False 355 LOG_BASE_FIRST = False 356 HEX_LOWERCASE = True 357 FORCE_EARLY_ALIAS_REF_EXPANSION = True 358 PRESERVE_ORIGINAL_NAMES = True 359 HEX_STRING_IS_INTEGER_TYPE = True 360 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 362 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 363 364 # bigquery udfs are case sensitive 365 NORMALIZE_FUNCTIONS = False 366 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 368 TIME_MAPPING = { 369 "%D": "%m/%d/%y", 370 "%E6S": "%S.%f", 371 "%e": "%-d", 372 } 373 374 FORMAT_MAPPING = { 375 "DD": "%d", 376 "MM": "%m", 377 "MON": "%b", 378 "MONTH": "%B", 379 "YYYY": "%Y", 380 "YY": "%y", 381 "HH": "%I", 382 "HH12": "%I", 383 "HH24": "%H", 384 "MI": "%M", 385 "SS": "%S", 386 "SSSSS": "%f", 387 "TZH": "%z", 388 } 389 390 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 391 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 392 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 393 394 # All set operations require either a DISTINCT or ALL specifier 395 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 396 397 ANNOTATORS = { 398 **Dialect.ANNOTATORS, 399 **{ 400 expr_type: lambda self, e: _annotate_math_functions(self, e) 401 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 402 }, 403 **{ 404 expr_type: lambda self, e: self._annotate_by_args(e, "this") 405 for expr_type in ( 406 exp.Left, 407 exp.Right, 408 exp.Lower, 409 exp.Upper, 410 exp.Pad, 411 exp.Trim, 412 exp.RegexpExtract, 413 exp.RegexpReplace, 414 exp.Repeat, 415 exp.Substring, 416 ) 417 }, 418 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 419 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 420 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 421 } 422 423 def normalize_identifier(self, expression: E) -> E: 424 if ( 425 isinstance(expression, exp.Identifier) 426 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 427 ): 428 parent = expression.parent 429 while isinstance(parent, exp.Dot): 430 parent = parent.parent 431 432 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 433 # by default. The following check uses a heuristic to detect tables based on whether 434 # they are qualified. This should generally be correct, because tables in BigQuery 435 # must be qualified with at least a dataset, unless @@dataset_id is set. 436 case_sensitive = ( 437 isinstance(parent, exp.UserDefinedFunction) 438 or ( 439 isinstance(parent, exp.Table) 440 and parent.db 441 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 442 ) 443 or expression.meta.get("is_table") 444 ) 445 if not case_sensitive: 446 expression.set("this", expression.this.lower()) 447 448 return expression 449 450 class Tokenizer(tokens.Tokenizer): 451 QUOTES = ["'", '"', '"""', "'''"] 452 COMMENTS = ["--", "#", ("/*", "*/")] 453 IDENTIFIERS = ["`"] 454 STRING_ESCAPES = ["\\"] 455 456 HEX_STRINGS = [("0x", ""), ("0X", "")] 457 458 BYTE_STRINGS = [ 459 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 460 ] 461 462 RAW_STRINGS = [ 463 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 464 ] 465 466 KEYWORDS = { 467 **tokens.Tokenizer.KEYWORDS, 468 "ANY TYPE": TokenType.VARIANT, 469 "BEGIN": TokenType.COMMAND, 470 "BEGIN TRANSACTION": TokenType.BEGIN, 471 "BYTEINT": TokenType.INT, 472 "BYTES": TokenType.BINARY, 473 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 474 "DATETIME": TokenType.TIMESTAMP, 475 "DECLARE": TokenType.COMMAND, 476 "ELSEIF": TokenType.COMMAND, 477 "EXCEPTION": TokenType.COMMAND, 478 "EXPORT": TokenType.EXPORT, 479 "FLOAT64": TokenType.DOUBLE, 480 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 481 "MODEL": TokenType.MODEL, 482 "NOT DETERMINISTIC": TokenType.VOLATILE, 483 "RECORD": TokenType.STRUCT, 484 "TIMESTAMP": TokenType.TIMESTAMPTZ, 485 } 486 KEYWORDS.pop("DIV") 487 KEYWORDS.pop("VALUES") 488 KEYWORDS.pop("/*+") 489 490 class Parser(parser.Parser): 491 PREFIXED_PIVOT_COLUMNS = True 492 LOG_DEFAULTS_TO_LN = True 493 SUPPORTS_IMPLICIT_UNNEST = True 494 495 FUNCTIONS = { 496 **parser.Parser.FUNCTIONS, 497 "CONTAINS_SUBSTR": _build_contains_substring, 498 "DATE": _build_date, 499 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 500 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 501 "DATE_TRUNC": lambda args: exp.DateTrunc( 502 unit=exp.Literal.string(str(seq_get(args, 1))), 503 this=seq_get(args, 0), 504 zone=seq_get(args, 2), 505 ), 506 "DATETIME": _build_datetime, 507 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 508 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 509 "DIV": binary_from_function(exp.IntDiv), 510 "EDIT_DISTANCE": _build_levenshtein, 511 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 512 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 513 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 514 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 515 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 516 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 517 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 518 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 519 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 520 "MD5": exp.MD5Digest.from_arg_list, 521 "TO_HEX": _build_to_hex, 522 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 523 [seq_get(args, 1), seq_get(args, 0)] 524 ), 525 "PARSE_TIMESTAMP": _build_parse_timestamp, 526 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 527 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 528 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 529 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 530 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 531 ), 532 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 533 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 534 "SPLIT": lambda args: exp.Split( 535 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 536 this=seq_get(args, 0), 537 expression=seq_get(args, 1) or exp.Literal.string(","), 538 ), 539 "STRPOS": exp.StrPosition.from_arg_list, 540 "TIME": _build_time, 541 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 542 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 543 "TIMESTAMP": _build_timestamp, 544 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 545 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 546 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 547 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 548 ), 549 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 550 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 551 ), 552 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 553 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 554 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 555 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 556 } 557 558 FUNCTION_PARSERS = { 559 **parser.Parser.FUNCTION_PARSERS, 560 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 561 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 562 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 563 } 564 FUNCTION_PARSERS.pop("TRIM") 565 566 NO_PAREN_FUNCTIONS = { 567 **parser.Parser.NO_PAREN_FUNCTIONS, 568 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 569 } 570 571 NESTED_TYPE_TOKENS = { 572 *parser.Parser.NESTED_TYPE_TOKENS, 573 TokenType.TABLE, 574 } 575 576 PROPERTY_PARSERS = { 577 **parser.Parser.PROPERTY_PARSERS, 578 "NOT DETERMINISTIC": lambda self: self.expression( 579 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 580 ), 581 "OPTIONS": lambda self: self._parse_with_property(), 582 } 583 584 CONSTRAINT_PARSERS = { 585 **parser.Parser.CONSTRAINT_PARSERS, 586 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 587 } 588 589 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 590 RANGE_PARSERS.pop(TokenType.OVERLAPS) 591 592 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 593 594 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 595 596 STATEMENT_PARSERS = { 597 **parser.Parser.STATEMENT_PARSERS, 598 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 599 TokenType.END: lambda self: self._parse_as_command(self._prev), 600 TokenType.FOR: lambda self: self._parse_for_in(), 601 TokenType.EXPORT: lambda self: self._parse_export_data(), 602 } 603 604 BRACKET_OFFSETS = { 605 "OFFSET": (0, False), 606 "ORDINAL": (1, False), 607 "SAFE_OFFSET": (0, True), 608 "SAFE_ORDINAL": (1, True), 609 } 610 611 def _parse_for_in(self) -> exp.ForIn: 612 this = self._parse_range() 613 self._match_text_seq("DO") 614 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 615 616 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 617 this = super()._parse_table_part(schema=schema) or self._parse_number() 618 619 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 620 if isinstance(this, exp.Identifier): 621 table_name = this.name 622 while self._match(TokenType.DASH, advance=False) and self._next: 623 start = self._curr 624 while self._is_connected() and not self._match_set( 625 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 626 ): 627 self._advance() 628 629 table_name += self._find_sql(start, self._prev) 630 631 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 632 elif isinstance(this, exp.Literal): 633 table_name = this.name 634 635 if self._is_connected() and self._parse_var(any_token=True): 636 table_name += self._prev.text 637 638 this = exp.Identifier(this=table_name, quoted=True) 639 640 return this 641 642 def _parse_table_parts( 643 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 644 ) -> exp.Table: 645 table = super()._parse_table_parts( 646 schema=schema, is_db_reference=is_db_reference, wildcard=True 647 ) 648 649 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 650 if not table.catalog: 651 if table.db: 652 parts = table.db.split(".") 653 if len(parts) == 2 and not table.args["db"].quoted: 654 table.set("catalog", exp.Identifier(this=parts[0])) 655 table.set("db", exp.Identifier(this=parts[1])) 656 else: 657 parts = table.name.split(".") 658 if len(parts) == 2 and not table.this.quoted: 659 table.set("db", exp.Identifier(this=parts[0])) 660 table.set("this", exp.Identifier(this=parts[1])) 661 662 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 663 alias = table.this 664 catalog, db, this, *rest = ( 665 exp.to_identifier(p, quoted=True) 666 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 667 ) 668 669 if rest and this: 670 this = exp.Dot.build([this, *rest]) # type: ignore 671 672 table = exp.Table( 673 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 674 ) 675 table.meta["quoted_table"] = True 676 else: 677 alias = None 678 679 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 680 # dataset, so if the project identifier is omitted we need to fix the ast so that 681 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 682 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 683 # views, because it would seem like the "catalog" part is set, when it'd actually 684 # be the region/dataset. Merging the two identifiers into a single one is done to 685 # avoid producing a 4-part Table reference, which would cause issues in the schema 686 # module, when there are 3-part table names mixed with information schema views. 687 # 688 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 689 table_parts = table.parts 690 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 691 # We need to alias the table here to avoid breaking existing qualified columns. 692 # This is expected to be safe, because if there's an actual alias coming up in 693 # the token stream, it will overwrite this one. If there isn't one, we are only 694 # exposing the name that can be used to reference the view explicitly (a no-op). 695 exp.alias_( 696 table, 697 t.cast(exp.Identifier, alias or table_parts[-1]), 698 table=True, 699 copy=False, 700 ) 701 702 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 703 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 704 table.set("db", seq_get(table_parts, -3)) 705 table.set("catalog", seq_get(table_parts, -4)) 706 707 return table 708 709 def _parse_column(self) -> t.Optional[exp.Expression]: 710 column = super()._parse_column() 711 if isinstance(column, exp.Column): 712 parts = column.parts 713 if any("." in p.name for p in parts): 714 catalog, db, table, this, *rest = ( 715 exp.to_identifier(p, quoted=True) 716 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 717 ) 718 719 if rest and this: 720 this = exp.Dot.build([this, *rest]) # type: ignore 721 722 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 723 column.meta["quoted_column"] = True 724 725 return column 726 727 @t.overload 728 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 729 730 @t.overload 731 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 732 733 def _parse_json_object(self, agg=False): 734 json_object = super()._parse_json_object() 735 array_kv_pair = seq_get(json_object.expressions, 0) 736 737 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 738 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 739 if ( 740 array_kv_pair 741 and isinstance(array_kv_pair.this, exp.Array) 742 and isinstance(array_kv_pair.expression, exp.Array) 743 ): 744 keys = array_kv_pair.this.expressions 745 values = array_kv_pair.expression.expressions 746 747 json_object.set( 748 "expressions", 749 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 750 ) 751 752 return json_object 753 754 def _parse_bracket( 755 self, this: t.Optional[exp.Expression] = None 756 ) -> t.Optional[exp.Expression]: 757 bracket = super()._parse_bracket(this) 758 759 if this is bracket: 760 return bracket 761 762 if isinstance(bracket, exp.Bracket): 763 for expression in bracket.expressions: 764 name = expression.name.upper() 765 766 if name not in self.BRACKET_OFFSETS: 767 break 768 769 offset, safe = self.BRACKET_OFFSETS[name] 770 bracket.set("offset", offset) 771 bracket.set("safe", safe) 772 expression.replace(expression.expressions[0]) 773 774 return bracket 775 776 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 777 unnest = super()._parse_unnest(with_alias=with_alias) 778 779 if not unnest: 780 return None 781 782 unnest_expr = seq_get(unnest.expressions, 0) 783 if unnest_expr: 784 from sqlglot.optimizer.annotate_types import annotate_types 785 786 unnest_expr = annotate_types(unnest_expr) 787 788 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 789 # in contrast to other dialects such as DuckDB which flattens only the array by default 790 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 791 array_elem.is_type(exp.DataType.Type.STRUCT) 792 for array_elem in unnest_expr._type.expressions 793 ): 794 unnest.set("explode_array", True) 795 796 return unnest 797 798 def _parse_make_interval(self) -> exp.MakeInterval: 799 expr = exp.MakeInterval() 800 801 for arg_key in expr.arg_types: 802 value = self._parse_lambda() 803 804 if not value: 805 break 806 807 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 808 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 809 if isinstance(value, exp.Kwarg): 810 arg_key = value.this.name 811 812 expr.set(arg_key, value) 813 814 self._match(TokenType.COMMA) 815 816 return expr 817 818 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 819 expr = self.expression( 820 exp.FeaturesAtTime, 821 this=(self._match(TokenType.TABLE) and self._parse_table()) 822 or self._parse_select(nested=True), 823 ) 824 825 while self._match(TokenType.COMMA): 826 arg = self._parse_lambda() 827 828 # Get the LHS of the Kwarg and set the arg to that value, e.g 829 # "num_rows => 1" sets the expr's `num_rows` arg 830 if arg: 831 expr.set(arg.this.name, arg) 832 833 return expr 834 835 def _parse_export_data(self) -> exp.Export: 836 self._match_text_seq("DATA") 837 838 return self.expression( 839 exp.Export, 840 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 841 options=self._parse_properties(), 842 this=self._match_text_seq("AS") and self._parse_select(), 843 ) 844 845 class Generator(generator.Generator): 846 INTERVAL_ALLOWS_PLURAL_FORM = False 847 JOIN_HINTS = False 848 QUERY_HINTS = False 849 TABLE_HINTS = False 850 LIMIT_FETCH = "LIMIT" 851 RENAME_TABLE_WITH_DB = False 852 NVL2_SUPPORTED = False 853 UNNEST_WITH_ORDINALITY = False 854 COLLATE_IS_FUNC = True 855 LIMIT_ONLY_LITERALS = True 856 SUPPORTS_TABLE_ALIAS_COLUMNS = False 857 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 858 JSON_KEY_VALUE_PAIR_SEP = "," 859 NULL_ORDERING_SUPPORTED = False 860 IGNORE_NULLS_IN_FUNC = True 861 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 862 CAN_IMPLEMENT_ARRAY_ANY = True 863 SUPPORTS_TO_NUMBER = False 864 NAMED_PLACEHOLDER_TOKEN = "@" 865 HEX_FUNC = "TO_HEX" 866 WITH_PROPERTIES_PREFIX = "OPTIONS" 867 SUPPORTS_EXPLODING_PROJECTIONS = False 868 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 869 SUPPORTS_UNIX_SECONDS = True 870 871 TRANSFORMS = { 872 **generator.Generator.TRANSFORMS, 873 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 874 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 875 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 876 exp.Array: inline_array_unless_query, 877 exp.ArrayContains: _array_contains_sql, 878 exp.ArrayFilter: filter_array_using_unnest, 879 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 880 exp.CollateProperty: lambda self, e: ( 881 f"DEFAULT COLLATE {self.sql(e, 'this')}" 882 if e.args.get("default") 883 else f"COLLATE {self.sql(e, 'this')}" 884 ), 885 exp.Commit: lambda *_: "COMMIT TRANSACTION", 886 exp.CountIf: rename_func("COUNTIF"), 887 exp.Create: _create_sql, 888 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 889 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 890 exp.DateDiff: lambda self, e: self.func( 891 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 892 ), 893 exp.DateFromParts: rename_func("DATE"), 894 exp.DateStrToDate: datestrtodate_sql, 895 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 896 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 897 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 898 exp.DateTrunc: lambda self, e: self.func( 899 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 900 ), 901 exp.FromTimeZone: lambda self, e: self.func( 902 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 903 ), 904 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 905 exp.GroupConcat: lambda self, e: groupconcat_sql( 906 self, e, func_name="STRING_AGG", within_group=False 907 ), 908 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 909 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 910 exp.If: if_sql(false_value="NULL"), 911 exp.ILike: no_ilike_sql, 912 exp.IntDiv: rename_func("DIV"), 913 exp.Int64: rename_func("INT64"), 914 exp.JSONExtract: _json_extract_sql, 915 exp.JSONExtractArray: _json_extract_sql, 916 exp.JSONExtractScalar: _json_extract_sql, 917 exp.JSONFormat: rename_func("TO_JSON_STRING"), 918 exp.Levenshtein: _levenshtein_sql, 919 exp.Max: max_or_greatest, 920 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 921 exp.MD5Digest: rename_func("MD5"), 922 exp.Min: min_or_least, 923 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 924 exp.RegexpExtract: lambda self, e: self.func( 925 "REGEXP_EXTRACT", 926 e.this, 927 e.expression, 928 e.args.get("position"), 929 e.args.get("occurrence"), 930 ), 931 exp.RegexpExtractAll: lambda self, e: self.func( 932 "REGEXP_EXTRACT_ALL", e.this, e.expression 933 ), 934 exp.RegexpReplace: regexp_replace_sql, 935 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 936 exp.ReturnsProperty: _returnsproperty_sql, 937 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 938 exp.Select: transforms.preprocess( 939 [ 940 transforms.explode_to_unnest(), 941 transforms.unqualify_unnest, 942 transforms.eliminate_distinct_on, 943 _alias_ordered_group, 944 transforms.eliminate_semi_and_anti_joins, 945 ] 946 ), 947 exp.SHA: rename_func("SHA1"), 948 exp.SHA2: sha256_sql, 949 exp.StabilityProperty: lambda self, e: ( 950 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 951 ), 952 exp.String: rename_func("STRING"), 953 exp.StrPosition: lambda self, e: ( 954 strposition_sql( 955 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 956 ) 957 ), 958 exp.StrToDate: _str_to_datetime_sql, 959 exp.StrToTime: _str_to_datetime_sql, 960 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 961 exp.TimeFromParts: rename_func("TIME"), 962 exp.TimestampFromParts: rename_func("DATETIME"), 963 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 964 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 965 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 966 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 967 exp.TimeStrToTime: timestrtotime_sql, 968 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 969 exp.TsOrDsAdd: _ts_or_ds_add_sql, 970 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 971 exp.TsOrDsToTime: rename_func("TIME"), 972 exp.TsOrDsToDatetime: rename_func("DATETIME"), 973 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 974 exp.Unhex: rename_func("FROM_HEX"), 975 exp.UnixDate: rename_func("UNIX_DATE"), 976 exp.UnixToTime: _unix_to_time_sql, 977 exp.Uuid: lambda *_: "GENERATE_UUID()", 978 exp.Values: _derived_table_values_to_unnest, 979 exp.VariancePop: rename_func("VAR_POP"), 980 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 981 } 982 983 SUPPORTED_JSON_PATH_PARTS = { 984 exp.JSONPathKey, 985 exp.JSONPathRoot, 986 exp.JSONPathSubscript, 987 } 988 989 TYPE_MAPPING = { 990 **generator.Generator.TYPE_MAPPING, 991 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 992 exp.DataType.Type.BIGINT: "INT64", 993 exp.DataType.Type.BINARY: "BYTES", 994 exp.DataType.Type.BLOB: "BYTES", 995 exp.DataType.Type.BOOLEAN: "BOOL", 996 exp.DataType.Type.CHAR: "STRING", 997 exp.DataType.Type.DECIMAL: "NUMERIC", 998 exp.DataType.Type.DOUBLE: "FLOAT64", 999 exp.DataType.Type.FLOAT: "FLOAT64", 1000 exp.DataType.Type.INT: "INT64", 1001 exp.DataType.Type.NCHAR: "STRING", 1002 exp.DataType.Type.NVARCHAR: "STRING", 1003 exp.DataType.Type.SMALLINT: "INT64", 1004 exp.DataType.Type.TEXT: "STRING", 1005 exp.DataType.Type.TIMESTAMP: "DATETIME", 1006 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1007 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1008 exp.DataType.Type.TINYINT: "INT64", 1009 exp.DataType.Type.ROWVERSION: "BYTES", 1010 exp.DataType.Type.UUID: "STRING", 1011 exp.DataType.Type.VARBINARY: "BYTES", 1012 exp.DataType.Type.VARCHAR: "STRING", 1013 exp.DataType.Type.VARIANT: "ANY TYPE", 1014 } 1015 1016 PROPERTIES_LOCATION = { 1017 **generator.Generator.PROPERTIES_LOCATION, 1018 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1019 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1020 } 1021 1022 # WINDOW comes after QUALIFY 1023 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1024 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1025 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1026 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1027 } 1028 1029 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1030 RESERVED_KEYWORDS = { 1031 "all", 1032 "and", 1033 "any", 1034 "array", 1035 "as", 1036 "asc", 1037 "assert_rows_modified", 1038 "at", 1039 "between", 1040 "by", 1041 "case", 1042 "cast", 1043 "collate", 1044 "contains", 1045 "create", 1046 "cross", 1047 "cube", 1048 "current", 1049 "default", 1050 "define", 1051 "desc", 1052 "distinct", 1053 "else", 1054 "end", 1055 "enum", 1056 "escape", 1057 "except", 1058 "exclude", 1059 "exists", 1060 "extract", 1061 "false", 1062 "fetch", 1063 "following", 1064 "for", 1065 "from", 1066 "full", 1067 "group", 1068 "grouping", 1069 "groups", 1070 "hash", 1071 "having", 1072 "if", 1073 "ignore", 1074 "in", 1075 "inner", 1076 "intersect", 1077 "interval", 1078 "into", 1079 "is", 1080 "join", 1081 "lateral", 1082 "left", 1083 "like", 1084 "limit", 1085 "lookup", 1086 "merge", 1087 "natural", 1088 "new", 1089 "no", 1090 "not", 1091 "null", 1092 "nulls", 1093 "of", 1094 "on", 1095 "or", 1096 "order", 1097 "outer", 1098 "over", 1099 "partition", 1100 "preceding", 1101 "proto", 1102 "qualify", 1103 "range", 1104 "recursive", 1105 "respect", 1106 "right", 1107 "rollup", 1108 "rows", 1109 "select", 1110 "set", 1111 "some", 1112 "struct", 1113 "tablesample", 1114 "then", 1115 "to", 1116 "treat", 1117 "true", 1118 "unbounded", 1119 "union", 1120 "unnest", 1121 "using", 1122 "when", 1123 "where", 1124 "window", 1125 "with", 1126 "within", 1127 } 1128 1129 def mod_sql(self, expression: exp.Mod) -> str: 1130 this = expression.this 1131 expr = expression.expression 1132 return self.func( 1133 "MOD", 1134 this.unnest() if isinstance(this, exp.Paren) else this, 1135 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1136 ) 1137 1138 def column_parts(self, expression: exp.Column) -> str: 1139 if expression.meta.get("quoted_column"): 1140 # If a column reference is of the form `dataset.table`.name, we need 1141 # to preserve the quoted table path, otherwise the reference breaks 1142 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1143 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1144 return f"{table_path}.{self.sql(expression, 'this')}" 1145 1146 return super().column_parts(expression) 1147 1148 def table_parts(self, expression: exp.Table) -> str: 1149 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1150 # we need to make sure the correct quoting is used in each case. 1151 # 1152 # For example, if there is a CTE x that clashes with a schema name, then the former will 1153 # return the table y in that schema, whereas the latter will return the CTE's y column: 1154 # 1155 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1156 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1157 if expression.meta.get("quoted_table"): 1158 table_parts = ".".join(p.name for p in expression.parts) 1159 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1160 1161 return super().table_parts(expression) 1162 1163 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1164 this = expression.this 1165 if isinstance(this, exp.TsOrDsToDatetime): 1166 func_name = "FORMAT_DATETIME" 1167 elif isinstance(this, exp.TsOrDsToTimestamp): 1168 func_name = "FORMAT_TIMESTAMP" 1169 else: 1170 func_name = "FORMAT_DATE" 1171 1172 time_expr = ( 1173 this 1174 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1175 else expression 1176 ) 1177 return self.func(func_name, self.format_time(expression), time_expr.this) 1178 1179 def eq_sql(self, expression: exp.EQ) -> str: 1180 # Operands of = cannot be NULL in BigQuery 1181 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1182 if not isinstance(expression.parent, exp.Update): 1183 return "NULL" 1184 1185 return self.binary(expression, "=") 1186 1187 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1188 parent = expression.parent 1189 1190 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1191 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1192 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1193 return self.func( 1194 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1195 ) 1196 1197 return super().attimezone_sql(expression) 1198 1199 def trycast_sql(self, expression: exp.TryCast) -> str: 1200 return self.cast_sql(expression, safe_prefix="SAFE_") 1201 1202 def bracket_sql(self, expression: exp.Bracket) -> str: 1203 this = expression.this 1204 expressions = expression.expressions 1205 1206 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1207 arg = expressions[0] 1208 if arg.type is None: 1209 from sqlglot.optimizer.annotate_types import annotate_types 1210 1211 arg = annotate_types(arg) 1212 1213 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1214 # BQ doesn't support bracket syntax with string values for structs 1215 return f"{self.sql(this)}.{arg.name}" 1216 1217 expressions_sql = self.expressions(expression, flat=True) 1218 offset = expression.args.get("offset") 1219 1220 if offset == 0: 1221 expressions_sql = f"OFFSET({expressions_sql})" 1222 elif offset == 1: 1223 expressions_sql = f"ORDINAL({expressions_sql})" 1224 elif offset is not None: 1225 self.unsupported(f"Unsupported array offset: {offset}") 1226 1227 if expression.args.get("safe"): 1228 expressions_sql = f"SAFE_{expressions_sql}" 1229 1230 return f"{self.sql(this)}[{expressions_sql}]" 1231 1232 def in_unnest_op(self, expression: exp.Unnest) -> str: 1233 return self.sql(expression) 1234 1235 def version_sql(self, expression: exp.Version) -> str: 1236 if expression.name == "TIMESTAMP": 1237 expression.set("this", "SYSTEM_TIME") 1238 return super().version_sql(expression) 1239 1240 def contains_sql(self, expression: exp.Contains) -> str: 1241 this = expression.this 1242 expr = expression.expression 1243 1244 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1245 this = this.this 1246 expr = expr.this 1247 1248 return self.func("CONTAINS_SUBSTR", this, expr) 1249 1250 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1251 this = expression.this 1252 1253 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1254 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1255 # because they aren't literals and so the above syntax is invalid BigQuery. 1256 if isinstance(this, exp.Array): 1257 elem = seq_get(this.expressions, 0) 1258 if not (elem and elem.find(exp.Query)): 1259 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1260 1261 return super().cast_sql(expression, safe_prefix=safe_prefix)
351class BigQuery(Dialect): 352 WEEK_OFFSET = -1 353 UNNEST_COLUMN_ONLY = True 354 SUPPORTS_USER_DEFINED_TYPES = False 355 SUPPORTS_SEMI_ANTI_JOIN = False 356 LOG_BASE_FIRST = False 357 HEX_LOWERCASE = True 358 FORCE_EARLY_ALIAS_REF_EXPANSION = True 359 PRESERVE_ORIGINAL_NAMES = True 360 HEX_STRING_IS_INTEGER_TYPE = True 361 362 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 363 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 364 365 # bigquery udfs are case sensitive 366 NORMALIZE_FUNCTIONS = False 367 368 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 369 TIME_MAPPING = { 370 "%D": "%m/%d/%y", 371 "%E6S": "%S.%f", 372 "%e": "%-d", 373 } 374 375 FORMAT_MAPPING = { 376 "DD": "%d", 377 "MM": "%m", 378 "MON": "%b", 379 "MONTH": "%B", 380 "YYYY": "%Y", 381 "YY": "%y", 382 "HH": "%I", 383 "HH12": "%I", 384 "HH24": "%H", 385 "MI": "%M", 386 "SS": "%S", 387 "SSSSS": "%f", 388 "TZH": "%z", 389 } 390 391 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 392 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 393 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 394 395 # All set operations require either a DISTINCT or ALL specifier 396 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 397 398 ANNOTATORS = { 399 **Dialect.ANNOTATORS, 400 **{ 401 expr_type: lambda self, e: _annotate_math_functions(self, e) 402 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 403 }, 404 **{ 405 expr_type: lambda self, e: self._annotate_by_args(e, "this") 406 for expr_type in ( 407 exp.Left, 408 exp.Right, 409 exp.Lower, 410 exp.Upper, 411 exp.Pad, 412 exp.Trim, 413 exp.RegexpExtract, 414 exp.RegexpReplace, 415 exp.Repeat, 416 exp.Substring, 417 ) 418 }, 419 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 420 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 421 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 422 } 423 424 def normalize_identifier(self, expression: E) -> E: 425 if ( 426 isinstance(expression, exp.Identifier) 427 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 428 ): 429 parent = expression.parent 430 while isinstance(parent, exp.Dot): 431 parent = parent.parent 432 433 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 434 # by default. The following check uses a heuristic to detect tables based on whether 435 # they are qualified. This should generally be correct, because tables in BigQuery 436 # must be qualified with at least a dataset, unless @@dataset_id is set. 437 case_sensitive = ( 438 isinstance(parent, exp.UserDefinedFunction) 439 or ( 440 isinstance(parent, exp.Table) 441 and parent.db 442 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 443 ) 444 or expression.meta.get("is_table") 445 ) 446 if not case_sensitive: 447 expression.set("this", expression.this.lower()) 448 449 return expression 450 451 class Tokenizer(tokens.Tokenizer): 452 QUOTES = ["'", '"', '"""', "'''"] 453 COMMENTS = ["--", "#", ("/*", "*/")] 454 IDENTIFIERS = ["`"] 455 STRING_ESCAPES = ["\\"] 456 457 HEX_STRINGS = [("0x", ""), ("0X", "")] 458 459 BYTE_STRINGS = [ 460 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 461 ] 462 463 RAW_STRINGS = [ 464 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 465 ] 466 467 KEYWORDS = { 468 **tokens.Tokenizer.KEYWORDS, 469 "ANY TYPE": TokenType.VARIANT, 470 "BEGIN": TokenType.COMMAND, 471 "BEGIN TRANSACTION": TokenType.BEGIN, 472 "BYTEINT": TokenType.INT, 473 "BYTES": TokenType.BINARY, 474 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 475 "DATETIME": TokenType.TIMESTAMP, 476 "DECLARE": TokenType.COMMAND, 477 "ELSEIF": TokenType.COMMAND, 478 "EXCEPTION": TokenType.COMMAND, 479 "EXPORT": TokenType.EXPORT, 480 "FLOAT64": TokenType.DOUBLE, 481 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 482 "MODEL": TokenType.MODEL, 483 "NOT DETERMINISTIC": TokenType.VOLATILE, 484 "RECORD": TokenType.STRUCT, 485 "TIMESTAMP": TokenType.TIMESTAMPTZ, 486 } 487 KEYWORDS.pop("DIV") 488 KEYWORDS.pop("VALUES") 489 KEYWORDS.pop("/*+") 490 491 class Parser(parser.Parser): 492 PREFIXED_PIVOT_COLUMNS = True 493 LOG_DEFAULTS_TO_LN = True 494 SUPPORTS_IMPLICIT_UNNEST = True 495 496 FUNCTIONS = { 497 **parser.Parser.FUNCTIONS, 498 "CONTAINS_SUBSTR": _build_contains_substring, 499 "DATE": _build_date, 500 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 501 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 502 "DATE_TRUNC": lambda args: exp.DateTrunc( 503 unit=exp.Literal.string(str(seq_get(args, 1))), 504 this=seq_get(args, 0), 505 zone=seq_get(args, 2), 506 ), 507 "DATETIME": _build_datetime, 508 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 509 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 510 "DIV": binary_from_function(exp.IntDiv), 511 "EDIT_DISTANCE": _build_levenshtein, 512 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 513 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 514 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 515 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 517 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 518 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 519 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 520 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 521 "MD5": exp.MD5Digest.from_arg_list, 522 "TO_HEX": _build_to_hex, 523 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 524 [seq_get(args, 1), seq_get(args, 0)] 525 ), 526 "PARSE_TIMESTAMP": _build_parse_timestamp, 527 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 528 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 529 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 530 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 531 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 532 ), 533 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 534 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 535 "SPLIT": lambda args: exp.Split( 536 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 537 this=seq_get(args, 0), 538 expression=seq_get(args, 1) or exp.Literal.string(","), 539 ), 540 "STRPOS": exp.StrPosition.from_arg_list, 541 "TIME": _build_time, 542 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 543 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 544 "TIMESTAMP": _build_timestamp, 545 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 546 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 547 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 548 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 549 ), 550 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 551 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 552 ), 553 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 554 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 555 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 556 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 557 } 558 559 FUNCTION_PARSERS = { 560 **parser.Parser.FUNCTION_PARSERS, 561 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 562 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 563 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 564 } 565 FUNCTION_PARSERS.pop("TRIM") 566 567 NO_PAREN_FUNCTIONS = { 568 **parser.Parser.NO_PAREN_FUNCTIONS, 569 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 570 } 571 572 NESTED_TYPE_TOKENS = { 573 *parser.Parser.NESTED_TYPE_TOKENS, 574 TokenType.TABLE, 575 } 576 577 PROPERTY_PARSERS = { 578 **parser.Parser.PROPERTY_PARSERS, 579 "NOT DETERMINISTIC": lambda self: self.expression( 580 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 581 ), 582 "OPTIONS": lambda self: self._parse_with_property(), 583 } 584 585 CONSTRAINT_PARSERS = { 586 **parser.Parser.CONSTRAINT_PARSERS, 587 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 588 } 589 590 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 591 RANGE_PARSERS.pop(TokenType.OVERLAPS) 592 593 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 594 595 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 596 597 STATEMENT_PARSERS = { 598 **parser.Parser.STATEMENT_PARSERS, 599 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 600 TokenType.END: lambda self: self._parse_as_command(self._prev), 601 TokenType.FOR: lambda self: self._parse_for_in(), 602 TokenType.EXPORT: lambda self: self._parse_export_data(), 603 } 604 605 BRACKET_OFFSETS = { 606 "OFFSET": (0, False), 607 "ORDINAL": (1, False), 608 "SAFE_OFFSET": (0, True), 609 "SAFE_ORDINAL": (1, True), 610 } 611 612 def _parse_for_in(self) -> exp.ForIn: 613 this = self._parse_range() 614 self._match_text_seq("DO") 615 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 616 617 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 618 this = super()._parse_table_part(schema=schema) or self._parse_number() 619 620 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 621 if isinstance(this, exp.Identifier): 622 table_name = this.name 623 while self._match(TokenType.DASH, advance=False) and self._next: 624 start = self._curr 625 while self._is_connected() and not self._match_set( 626 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 627 ): 628 self._advance() 629 630 table_name += self._find_sql(start, self._prev) 631 632 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 633 elif isinstance(this, exp.Literal): 634 table_name = this.name 635 636 if self._is_connected() and self._parse_var(any_token=True): 637 table_name += self._prev.text 638 639 this = exp.Identifier(this=table_name, quoted=True) 640 641 return this 642 643 def _parse_table_parts( 644 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 645 ) -> exp.Table: 646 table = super()._parse_table_parts( 647 schema=schema, is_db_reference=is_db_reference, wildcard=True 648 ) 649 650 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 651 if not table.catalog: 652 if table.db: 653 parts = table.db.split(".") 654 if len(parts) == 2 and not table.args["db"].quoted: 655 table.set("catalog", exp.Identifier(this=parts[0])) 656 table.set("db", exp.Identifier(this=parts[1])) 657 else: 658 parts = table.name.split(".") 659 if len(parts) == 2 and not table.this.quoted: 660 table.set("db", exp.Identifier(this=parts[0])) 661 table.set("this", exp.Identifier(this=parts[1])) 662 663 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 664 alias = table.this 665 catalog, db, this, *rest = ( 666 exp.to_identifier(p, quoted=True) 667 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 668 ) 669 670 if rest and this: 671 this = exp.Dot.build([this, *rest]) # type: ignore 672 673 table = exp.Table( 674 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 675 ) 676 table.meta["quoted_table"] = True 677 else: 678 alias = None 679 680 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 681 # dataset, so if the project identifier is omitted we need to fix the ast so that 682 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 683 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 684 # views, because it would seem like the "catalog" part is set, when it'd actually 685 # be the region/dataset. Merging the two identifiers into a single one is done to 686 # avoid producing a 4-part Table reference, which would cause issues in the schema 687 # module, when there are 3-part table names mixed with information schema views. 688 # 689 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 690 table_parts = table.parts 691 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 692 # We need to alias the table here to avoid breaking existing qualified columns. 693 # This is expected to be safe, because if there's an actual alias coming up in 694 # the token stream, it will overwrite this one. If there isn't one, we are only 695 # exposing the name that can be used to reference the view explicitly (a no-op). 696 exp.alias_( 697 table, 698 t.cast(exp.Identifier, alias or table_parts[-1]), 699 table=True, 700 copy=False, 701 ) 702 703 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 704 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 705 table.set("db", seq_get(table_parts, -3)) 706 table.set("catalog", seq_get(table_parts, -4)) 707 708 return table 709 710 def _parse_column(self) -> t.Optional[exp.Expression]: 711 column = super()._parse_column() 712 if isinstance(column, exp.Column): 713 parts = column.parts 714 if any("." in p.name for p in parts): 715 catalog, db, table, this, *rest = ( 716 exp.to_identifier(p, quoted=True) 717 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 718 ) 719 720 if rest and this: 721 this = exp.Dot.build([this, *rest]) # type: ignore 722 723 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 724 column.meta["quoted_column"] = True 725 726 return column 727 728 @t.overload 729 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 730 731 @t.overload 732 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 733 734 def _parse_json_object(self, agg=False): 735 json_object = super()._parse_json_object() 736 array_kv_pair = seq_get(json_object.expressions, 0) 737 738 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 739 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 740 if ( 741 array_kv_pair 742 and isinstance(array_kv_pair.this, exp.Array) 743 and isinstance(array_kv_pair.expression, exp.Array) 744 ): 745 keys = array_kv_pair.this.expressions 746 values = array_kv_pair.expression.expressions 747 748 json_object.set( 749 "expressions", 750 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 751 ) 752 753 return json_object 754 755 def _parse_bracket( 756 self, this: t.Optional[exp.Expression] = None 757 ) -> t.Optional[exp.Expression]: 758 bracket = super()._parse_bracket(this) 759 760 if this is bracket: 761 return bracket 762 763 if isinstance(bracket, exp.Bracket): 764 for expression in bracket.expressions: 765 name = expression.name.upper() 766 767 if name not in self.BRACKET_OFFSETS: 768 break 769 770 offset, safe = self.BRACKET_OFFSETS[name] 771 bracket.set("offset", offset) 772 bracket.set("safe", safe) 773 expression.replace(expression.expressions[0]) 774 775 return bracket 776 777 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 778 unnest = super()._parse_unnest(with_alias=with_alias) 779 780 if not unnest: 781 return None 782 783 unnest_expr = seq_get(unnest.expressions, 0) 784 if unnest_expr: 785 from sqlglot.optimizer.annotate_types import annotate_types 786 787 unnest_expr = annotate_types(unnest_expr) 788 789 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 790 # in contrast to other dialects such as DuckDB which flattens only the array by default 791 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 792 array_elem.is_type(exp.DataType.Type.STRUCT) 793 for array_elem in unnest_expr._type.expressions 794 ): 795 unnest.set("explode_array", True) 796 797 return unnest 798 799 def _parse_make_interval(self) -> exp.MakeInterval: 800 expr = exp.MakeInterval() 801 802 for arg_key in expr.arg_types: 803 value = self._parse_lambda() 804 805 if not value: 806 break 807 808 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 809 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 810 if isinstance(value, exp.Kwarg): 811 arg_key = value.this.name 812 813 expr.set(arg_key, value) 814 815 self._match(TokenType.COMMA) 816 817 return expr 818 819 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 820 expr = self.expression( 821 exp.FeaturesAtTime, 822 this=(self._match(TokenType.TABLE) and self._parse_table()) 823 or self._parse_select(nested=True), 824 ) 825 826 while self._match(TokenType.COMMA): 827 arg = self._parse_lambda() 828 829 # Get the LHS of the Kwarg and set the arg to that value, e.g 830 # "num_rows => 1" sets the expr's `num_rows` arg 831 if arg: 832 expr.set(arg.this.name, arg) 833 834 return expr 835 836 def _parse_export_data(self) -> exp.Export: 837 self._match_text_seq("DATA") 838 839 return self.expression( 840 exp.Export, 841 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 842 options=self._parse_properties(), 843 this=self._match_text_seq("AS") and self._parse_select(), 844 ) 845 846 class Generator(generator.Generator): 847 INTERVAL_ALLOWS_PLURAL_FORM = False 848 JOIN_HINTS = False 849 QUERY_HINTS = False 850 TABLE_HINTS = False 851 LIMIT_FETCH = "LIMIT" 852 RENAME_TABLE_WITH_DB = False 853 NVL2_SUPPORTED = False 854 UNNEST_WITH_ORDINALITY = False 855 COLLATE_IS_FUNC = True 856 LIMIT_ONLY_LITERALS = True 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 859 JSON_KEY_VALUE_PAIR_SEP = "," 860 NULL_ORDERING_SUPPORTED = False 861 IGNORE_NULLS_IN_FUNC = True 862 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 863 CAN_IMPLEMENT_ARRAY_ANY = True 864 SUPPORTS_TO_NUMBER = False 865 NAMED_PLACEHOLDER_TOKEN = "@" 866 HEX_FUNC = "TO_HEX" 867 WITH_PROPERTIES_PREFIX = "OPTIONS" 868 SUPPORTS_EXPLODING_PROJECTIONS = False 869 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 870 SUPPORTS_UNIX_SECONDS = True 871 872 TRANSFORMS = { 873 **generator.Generator.TRANSFORMS, 874 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 875 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 876 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 877 exp.Array: inline_array_unless_query, 878 exp.ArrayContains: _array_contains_sql, 879 exp.ArrayFilter: filter_array_using_unnest, 880 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 881 exp.CollateProperty: lambda self, e: ( 882 f"DEFAULT COLLATE {self.sql(e, 'this')}" 883 if e.args.get("default") 884 else f"COLLATE {self.sql(e, 'this')}" 885 ), 886 exp.Commit: lambda *_: "COMMIT TRANSACTION", 887 exp.CountIf: rename_func("COUNTIF"), 888 exp.Create: _create_sql, 889 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 890 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 891 exp.DateDiff: lambda self, e: self.func( 892 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 893 ), 894 exp.DateFromParts: rename_func("DATE"), 895 exp.DateStrToDate: datestrtodate_sql, 896 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 897 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 898 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 899 exp.DateTrunc: lambda self, e: self.func( 900 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 901 ), 902 exp.FromTimeZone: lambda self, e: self.func( 903 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 904 ), 905 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 906 exp.GroupConcat: lambda self, e: groupconcat_sql( 907 self, e, func_name="STRING_AGG", within_group=False 908 ), 909 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 910 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 911 exp.If: if_sql(false_value="NULL"), 912 exp.ILike: no_ilike_sql, 913 exp.IntDiv: rename_func("DIV"), 914 exp.Int64: rename_func("INT64"), 915 exp.JSONExtract: _json_extract_sql, 916 exp.JSONExtractArray: _json_extract_sql, 917 exp.JSONExtractScalar: _json_extract_sql, 918 exp.JSONFormat: rename_func("TO_JSON_STRING"), 919 exp.Levenshtein: _levenshtein_sql, 920 exp.Max: max_or_greatest, 921 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 922 exp.MD5Digest: rename_func("MD5"), 923 exp.Min: min_or_least, 924 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 925 exp.RegexpExtract: lambda self, e: self.func( 926 "REGEXP_EXTRACT", 927 e.this, 928 e.expression, 929 e.args.get("position"), 930 e.args.get("occurrence"), 931 ), 932 exp.RegexpExtractAll: lambda self, e: self.func( 933 "REGEXP_EXTRACT_ALL", e.this, e.expression 934 ), 935 exp.RegexpReplace: regexp_replace_sql, 936 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 937 exp.ReturnsProperty: _returnsproperty_sql, 938 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 939 exp.Select: transforms.preprocess( 940 [ 941 transforms.explode_to_unnest(), 942 transforms.unqualify_unnest, 943 transforms.eliminate_distinct_on, 944 _alias_ordered_group, 945 transforms.eliminate_semi_and_anti_joins, 946 ] 947 ), 948 exp.SHA: rename_func("SHA1"), 949 exp.SHA2: sha256_sql, 950 exp.StabilityProperty: lambda self, e: ( 951 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 952 ), 953 exp.String: rename_func("STRING"), 954 exp.StrPosition: lambda self, e: ( 955 strposition_sql( 956 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 957 ) 958 ), 959 exp.StrToDate: _str_to_datetime_sql, 960 exp.StrToTime: _str_to_datetime_sql, 961 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 962 exp.TimeFromParts: rename_func("TIME"), 963 exp.TimestampFromParts: rename_func("DATETIME"), 964 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 965 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 966 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 967 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 968 exp.TimeStrToTime: timestrtotime_sql, 969 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 970 exp.TsOrDsAdd: _ts_or_ds_add_sql, 971 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 972 exp.TsOrDsToTime: rename_func("TIME"), 973 exp.TsOrDsToDatetime: rename_func("DATETIME"), 974 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 975 exp.Unhex: rename_func("FROM_HEX"), 976 exp.UnixDate: rename_func("UNIX_DATE"), 977 exp.UnixToTime: _unix_to_time_sql, 978 exp.Uuid: lambda *_: "GENERATE_UUID()", 979 exp.Values: _derived_table_values_to_unnest, 980 exp.VariancePop: rename_func("VAR_POP"), 981 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 982 } 983 984 SUPPORTED_JSON_PATH_PARTS = { 985 exp.JSONPathKey, 986 exp.JSONPathRoot, 987 exp.JSONPathSubscript, 988 } 989 990 TYPE_MAPPING = { 991 **generator.Generator.TYPE_MAPPING, 992 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 993 exp.DataType.Type.BIGINT: "INT64", 994 exp.DataType.Type.BINARY: "BYTES", 995 exp.DataType.Type.BLOB: "BYTES", 996 exp.DataType.Type.BOOLEAN: "BOOL", 997 exp.DataType.Type.CHAR: "STRING", 998 exp.DataType.Type.DECIMAL: "NUMERIC", 999 exp.DataType.Type.DOUBLE: "FLOAT64", 1000 exp.DataType.Type.FLOAT: "FLOAT64", 1001 exp.DataType.Type.INT: "INT64", 1002 exp.DataType.Type.NCHAR: "STRING", 1003 exp.DataType.Type.NVARCHAR: "STRING", 1004 exp.DataType.Type.SMALLINT: "INT64", 1005 exp.DataType.Type.TEXT: "STRING", 1006 exp.DataType.Type.TIMESTAMP: "DATETIME", 1007 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1008 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1009 exp.DataType.Type.TINYINT: "INT64", 1010 exp.DataType.Type.ROWVERSION: "BYTES", 1011 exp.DataType.Type.UUID: "STRING", 1012 exp.DataType.Type.VARBINARY: "BYTES", 1013 exp.DataType.Type.VARCHAR: "STRING", 1014 exp.DataType.Type.VARIANT: "ANY TYPE", 1015 } 1016 1017 PROPERTIES_LOCATION = { 1018 **generator.Generator.PROPERTIES_LOCATION, 1019 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1020 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1021 } 1022 1023 # WINDOW comes after QUALIFY 1024 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1025 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1026 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1027 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1028 } 1029 1030 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1031 RESERVED_KEYWORDS = { 1032 "all", 1033 "and", 1034 "any", 1035 "array", 1036 "as", 1037 "asc", 1038 "assert_rows_modified", 1039 "at", 1040 "between", 1041 "by", 1042 "case", 1043 "cast", 1044 "collate", 1045 "contains", 1046 "create", 1047 "cross", 1048 "cube", 1049 "current", 1050 "default", 1051 "define", 1052 "desc", 1053 "distinct", 1054 "else", 1055 "end", 1056 "enum", 1057 "escape", 1058 "except", 1059 "exclude", 1060 "exists", 1061 "extract", 1062 "false", 1063 "fetch", 1064 "following", 1065 "for", 1066 "from", 1067 "full", 1068 "group", 1069 "grouping", 1070 "groups", 1071 "hash", 1072 "having", 1073 "if", 1074 "ignore", 1075 "in", 1076 "inner", 1077 "intersect", 1078 "interval", 1079 "into", 1080 "is", 1081 "join", 1082 "lateral", 1083 "left", 1084 "like", 1085 "limit", 1086 "lookup", 1087 "merge", 1088 "natural", 1089 "new", 1090 "no", 1091 "not", 1092 "null", 1093 "nulls", 1094 "of", 1095 "on", 1096 "or", 1097 "order", 1098 "outer", 1099 "over", 1100 "partition", 1101 "preceding", 1102 "proto", 1103 "qualify", 1104 "range", 1105 "recursive", 1106 "respect", 1107 "right", 1108 "rollup", 1109 "rows", 1110 "select", 1111 "set", 1112 "some", 1113 "struct", 1114 "tablesample", 1115 "then", 1116 "to", 1117 "treat", 1118 "true", 1119 "unbounded", 1120 "union", 1121 "unnest", 1122 "using", 1123 "when", 1124 "where", 1125 "window", 1126 "with", 1127 "within", 1128 } 1129 1130 def mod_sql(self, expression: exp.Mod) -> str: 1131 this = expression.this 1132 expr = expression.expression 1133 return self.func( 1134 "MOD", 1135 this.unnest() if isinstance(this, exp.Paren) else this, 1136 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1137 ) 1138 1139 def column_parts(self, expression: exp.Column) -> str: 1140 if expression.meta.get("quoted_column"): 1141 # If a column reference is of the form `dataset.table`.name, we need 1142 # to preserve the quoted table path, otherwise the reference breaks 1143 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1144 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1145 return f"{table_path}.{self.sql(expression, 'this')}" 1146 1147 return super().column_parts(expression) 1148 1149 def table_parts(self, expression: exp.Table) -> str: 1150 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1151 # we need to make sure the correct quoting is used in each case. 1152 # 1153 # For example, if there is a CTE x that clashes with a schema name, then the former will 1154 # return the table y in that schema, whereas the latter will return the CTE's y column: 1155 # 1156 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1158 if expression.meta.get("quoted_table"): 1159 table_parts = ".".join(p.name for p in expression.parts) 1160 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1161 1162 return super().table_parts(expression) 1163 1164 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1165 this = expression.this 1166 if isinstance(this, exp.TsOrDsToDatetime): 1167 func_name = "FORMAT_DATETIME" 1168 elif isinstance(this, exp.TsOrDsToTimestamp): 1169 func_name = "FORMAT_TIMESTAMP" 1170 else: 1171 func_name = "FORMAT_DATE" 1172 1173 time_expr = ( 1174 this 1175 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1176 else expression 1177 ) 1178 return self.func(func_name, self.format_time(expression), time_expr.this) 1179 1180 def eq_sql(self, expression: exp.EQ) -> str: 1181 # Operands of = cannot be NULL in BigQuery 1182 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1183 if not isinstance(expression.parent, exp.Update): 1184 return "NULL" 1185 1186 return self.binary(expression, "=") 1187 1188 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1189 parent = expression.parent 1190 1191 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1192 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1193 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1194 return self.func( 1195 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1196 ) 1197 1198 return super().attimezone_sql(expression) 1199 1200 def trycast_sql(self, expression: exp.TryCast) -> str: 1201 return self.cast_sql(expression, safe_prefix="SAFE_") 1202 1203 def bracket_sql(self, expression: exp.Bracket) -> str: 1204 this = expression.this 1205 expressions = expression.expressions 1206 1207 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1208 arg = expressions[0] 1209 if arg.type is None: 1210 from sqlglot.optimizer.annotate_types import annotate_types 1211 1212 arg = annotate_types(arg) 1213 1214 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1215 # BQ doesn't support bracket syntax with string values for structs 1216 return f"{self.sql(this)}.{arg.name}" 1217 1218 expressions_sql = self.expressions(expression, flat=True) 1219 offset = expression.args.get("offset") 1220 1221 if offset == 0: 1222 expressions_sql = f"OFFSET({expressions_sql})" 1223 elif offset == 1: 1224 expressions_sql = f"ORDINAL({expressions_sql})" 1225 elif offset is not None: 1226 self.unsupported(f"Unsupported array offset: {offset}") 1227 1228 if expression.args.get("safe"): 1229 expressions_sql = f"SAFE_{expressions_sql}" 1230 1231 return f"{self.sql(this)}[{expressions_sql}]" 1232 1233 def in_unnest_op(self, expression: exp.Unnest) -> str: 1234 return self.sql(expression) 1235 1236 def version_sql(self, expression: exp.Version) -> str: 1237 if expression.name == "TIMESTAMP": 1238 expression.set("this", "SYSTEM_TIME") 1239 return super().version_sql(expression) 1240 1241 def contains_sql(self, expression: exp.Contains) -> str: 1242 this = expression.this 1243 expr = expression.expression 1244 1245 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1246 this = this.this 1247 expr = expr.this 1248 1249 return self.func("CONTAINS_SUBSTR", this, expr) 1250 1251 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1252 this = expression.this 1253 1254 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1255 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1256 # because they aren't literals and so the above syntax is invalid BigQuery. 1257 if isinstance(this, exp.Array): 1258 elem = seq_get(this.expressions, 0) 1259 if not (elem and elem.find(exp.Query)): 1260 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1261 1262 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
424 def normalize_identifier(self, expression: E) -> E: 425 if ( 426 isinstance(expression, exp.Identifier) 427 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 428 ): 429 parent = expression.parent 430 while isinstance(parent, exp.Dot): 431 parent = parent.parent 432 433 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 434 # by default. The following check uses a heuristic to detect tables based on whether 435 # they are qualified. This should generally be correct, because tables in BigQuery 436 # must be qualified with at least a dataset, unless @@dataset_id is set. 437 case_sensitive = ( 438 isinstance(parent, exp.UserDefinedFunction) 439 or ( 440 isinstance(parent, exp.Table) 441 and parent.db 442 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 443 ) 444 or expression.meta.get("is_table") 445 ) 446 if not case_sensitive: 447 expression.set("this", expression.this.lower()) 448 449 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
451 class Tokenizer(tokens.Tokenizer): 452 QUOTES = ["'", '"', '"""', "'''"] 453 COMMENTS = ["--", "#", ("/*", "*/")] 454 IDENTIFIERS = ["`"] 455 STRING_ESCAPES = ["\\"] 456 457 HEX_STRINGS = [("0x", ""), ("0X", "")] 458 459 BYTE_STRINGS = [ 460 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 461 ] 462 463 RAW_STRINGS = [ 464 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 465 ] 466 467 KEYWORDS = { 468 **tokens.Tokenizer.KEYWORDS, 469 "ANY TYPE": TokenType.VARIANT, 470 "BEGIN": TokenType.COMMAND, 471 "BEGIN TRANSACTION": TokenType.BEGIN, 472 "BYTEINT": TokenType.INT, 473 "BYTES": TokenType.BINARY, 474 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 475 "DATETIME": TokenType.TIMESTAMP, 476 "DECLARE": TokenType.COMMAND, 477 "ELSEIF": TokenType.COMMAND, 478 "EXCEPTION": TokenType.COMMAND, 479 "EXPORT": TokenType.EXPORT, 480 "FLOAT64": TokenType.DOUBLE, 481 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 482 "MODEL": TokenType.MODEL, 483 "NOT DETERMINISTIC": TokenType.VOLATILE, 484 "RECORD": TokenType.STRUCT, 485 "TIMESTAMP": TokenType.TIMESTAMPTZ, 486 } 487 KEYWORDS.pop("DIV") 488 KEYWORDS.pop("VALUES") 489 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
491 class Parser(parser.Parser): 492 PREFIXED_PIVOT_COLUMNS = True 493 LOG_DEFAULTS_TO_LN = True 494 SUPPORTS_IMPLICIT_UNNEST = True 495 496 FUNCTIONS = { 497 **parser.Parser.FUNCTIONS, 498 "CONTAINS_SUBSTR": _build_contains_substring, 499 "DATE": _build_date, 500 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 501 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 502 "DATE_TRUNC": lambda args: exp.DateTrunc( 503 unit=exp.Literal.string(str(seq_get(args, 1))), 504 this=seq_get(args, 0), 505 zone=seq_get(args, 2), 506 ), 507 "DATETIME": _build_datetime, 508 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 509 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 510 "DIV": binary_from_function(exp.IntDiv), 511 "EDIT_DISTANCE": _build_levenshtein, 512 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 513 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 514 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 515 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 517 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 518 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 519 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 520 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 521 "MD5": exp.MD5Digest.from_arg_list, 522 "TO_HEX": _build_to_hex, 523 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 524 [seq_get(args, 1), seq_get(args, 0)] 525 ), 526 "PARSE_TIMESTAMP": _build_parse_timestamp, 527 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 528 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 529 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 530 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 531 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 532 ), 533 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 534 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 535 "SPLIT": lambda args: exp.Split( 536 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 537 this=seq_get(args, 0), 538 expression=seq_get(args, 1) or exp.Literal.string(","), 539 ), 540 "STRPOS": exp.StrPosition.from_arg_list, 541 "TIME": _build_time, 542 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 543 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 544 "TIMESTAMP": _build_timestamp, 545 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 546 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 547 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 548 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 549 ), 550 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 551 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 552 ), 553 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 554 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 555 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 556 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 557 } 558 559 FUNCTION_PARSERS = { 560 **parser.Parser.FUNCTION_PARSERS, 561 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 562 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 563 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 564 } 565 FUNCTION_PARSERS.pop("TRIM") 566 567 NO_PAREN_FUNCTIONS = { 568 **parser.Parser.NO_PAREN_FUNCTIONS, 569 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 570 } 571 572 NESTED_TYPE_TOKENS = { 573 *parser.Parser.NESTED_TYPE_TOKENS, 574 TokenType.TABLE, 575 } 576 577 PROPERTY_PARSERS = { 578 **parser.Parser.PROPERTY_PARSERS, 579 "NOT DETERMINISTIC": lambda self: self.expression( 580 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 581 ), 582 "OPTIONS": lambda self: self._parse_with_property(), 583 } 584 585 CONSTRAINT_PARSERS = { 586 **parser.Parser.CONSTRAINT_PARSERS, 587 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 588 } 589 590 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 591 RANGE_PARSERS.pop(TokenType.OVERLAPS) 592 593 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 594 595 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 596 597 STATEMENT_PARSERS = { 598 **parser.Parser.STATEMENT_PARSERS, 599 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 600 TokenType.END: lambda self: self._parse_as_command(self._prev), 601 TokenType.FOR: lambda self: self._parse_for_in(), 602 TokenType.EXPORT: lambda self: self._parse_export_data(), 603 } 604 605 BRACKET_OFFSETS = { 606 "OFFSET": (0, False), 607 "ORDINAL": (1, False), 608 "SAFE_OFFSET": (0, True), 609 "SAFE_ORDINAL": (1, True), 610 } 611 612 def _parse_for_in(self) -> exp.ForIn: 613 this = self._parse_range() 614 self._match_text_seq("DO") 615 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 616 617 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 618 this = super()._parse_table_part(schema=schema) or self._parse_number() 619 620 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 621 if isinstance(this, exp.Identifier): 622 table_name = this.name 623 while self._match(TokenType.DASH, advance=False) and self._next: 624 start = self._curr 625 while self._is_connected() and not self._match_set( 626 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 627 ): 628 self._advance() 629 630 table_name += self._find_sql(start, self._prev) 631 632 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 633 elif isinstance(this, exp.Literal): 634 table_name = this.name 635 636 if self._is_connected() and self._parse_var(any_token=True): 637 table_name += self._prev.text 638 639 this = exp.Identifier(this=table_name, quoted=True) 640 641 return this 642 643 def _parse_table_parts( 644 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 645 ) -> exp.Table: 646 table = super()._parse_table_parts( 647 schema=schema, is_db_reference=is_db_reference, wildcard=True 648 ) 649 650 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 651 if not table.catalog: 652 if table.db: 653 parts = table.db.split(".") 654 if len(parts) == 2 and not table.args["db"].quoted: 655 table.set("catalog", exp.Identifier(this=parts[0])) 656 table.set("db", exp.Identifier(this=parts[1])) 657 else: 658 parts = table.name.split(".") 659 if len(parts) == 2 and not table.this.quoted: 660 table.set("db", exp.Identifier(this=parts[0])) 661 table.set("this", exp.Identifier(this=parts[1])) 662 663 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 664 alias = table.this 665 catalog, db, this, *rest = ( 666 exp.to_identifier(p, quoted=True) 667 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 668 ) 669 670 if rest and this: 671 this = exp.Dot.build([this, *rest]) # type: ignore 672 673 table = exp.Table( 674 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 675 ) 676 table.meta["quoted_table"] = True 677 else: 678 alias = None 679 680 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 681 # dataset, so if the project identifier is omitted we need to fix the ast so that 682 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 683 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 684 # views, because it would seem like the "catalog" part is set, when it'd actually 685 # be the region/dataset. Merging the two identifiers into a single one is done to 686 # avoid producing a 4-part Table reference, which would cause issues in the schema 687 # module, when there are 3-part table names mixed with information schema views. 688 # 689 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 690 table_parts = table.parts 691 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 692 # We need to alias the table here to avoid breaking existing qualified columns. 693 # This is expected to be safe, because if there's an actual alias coming up in 694 # the token stream, it will overwrite this one. If there isn't one, we are only 695 # exposing the name that can be used to reference the view explicitly (a no-op). 696 exp.alias_( 697 table, 698 t.cast(exp.Identifier, alias or table_parts[-1]), 699 table=True, 700 copy=False, 701 ) 702 703 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 704 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 705 table.set("db", seq_get(table_parts, -3)) 706 table.set("catalog", seq_get(table_parts, -4)) 707 708 return table 709 710 def _parse_column(self) -> t.Optional[exp.Expression]: 711 column = super()._parse_column() 712 if isinstance(column, exp.Column): 713 parts = column.parts 714 if any("." in p.name for p in parts): 715 catalog, db, table, this, *rest = ( 716 exp.to_identifier(p, quoted=True) 717 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 718 ) 719 720 if rest and this: 721 this = exp.Dot.build([this, *rest]) # type: ignore 722 723 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 724 column.meta["quoted_column"] = True 725 726 return column 727 728 @t.overload 729 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 730 731 @t.overload 732 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 733 734 def _parse_json_object(self, agg=False): 735 json_object = super()._parse_json_object() 736 array_kv_pair = seq_get(json_object.expressions, 0) 737 738 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 739 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 740 if ( 741 array_kv_pair 742 and isinstance(array_kv_pair.this, exp.Array) 743 and isinstance(array_kv_pair.expression, exp.Array) 744 ): 745 keys = array_kv_pair.this.expressions 746 values = array_kv_pair.expression.expressions 747 748 json_object.set( 749 "expressions", 750 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 751 ) 752 753 return json_object 754 755 def _parse_bracket( 756 self, this: t.Optional[exp.Expression] = None 757 ) -> t.Optional[exp.Expression]: 758 bracket = super()._parse_bracket(this) 759 760 if this is bracket: 761 return bracket 762 763 if isinstance(bracket, exp.Bracket): 764 for expression in bracket.expressions: 765 name = expression.name.upper() 766 767 if name not in self.BRACKET_OFFSETS: 768 break 769 770 offset, safe = self.BRACKET_OFFSETS[name] 771 bracket.set("offset", offset) 772 bracket.set("safe", safe) 773 expression.replace(expression.expressions[0]) 774 775 return bracket 776 777 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 778 unnest = super()._parse_unnest(with_alias=with_alias) 779 780 if not unnest: 781 return None 782 783 unnest_expr = seq_get(unnest.expressions, 0) 784 if unnest_expr: 785 from sqlglot.optimizer.annotate_types import annotate_types 786 787 unnest_expr = annotate_types(unnest_expr) 788 789 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 790 # in contrast to other dialects such as DuckDB which flattens only the array by default 791 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 792 array_elem.is_type(exp.DataType.Type.STRUCT) 793 for array_elem in unnest_expr._type.expressions 794 ): 795 unnest.set("explode_array", True) 796 797 return unnest 798 799 def _parse_make_interval(self) -> exp.MakeInterval: 800 expr = exp.MakeInterval() 801 802 for arg_key in expr.arg_types: 803 value = self._parse_lambda() 804 805 if not value: 806 break 807 808 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 809 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 810 if isinstance(value, exp.Kwarg): 811 arg_key = value.this.name 812 813 expr.set(arg_key, value) 814 815 self._match(TokenType.COMMA) 816 817 return expr 818 819 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 820 expr = self.expression( 821 exp.FeaturesAtTime, 822 this=(self._match(TokenType.TABLE) and self._parse_table()) 823 or self._parse_select(nested=True), 824 ) 825 826 while self._match(TokenType.COMMA): 827 arg = self._parse_lambda() 828 829 # Get the LHS of the Kwarg and set the arg to that value, e.g 830 # "num_rows => 1" sets the expr's `num_rows` arg 831 if arg: 832 expr.set(arg.this.name, arg) 833 834 return expr 835 836 def _parse_export_data(self) -> exp.Export: 837 self._match_text_seq("DATA") 838 839 return self.expression( 840 exp.Export, 841 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 842 options=self._parse_properties(), 843 this=self._match_text_seq("AS") and self._parse_select(), 844 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
846 class Generator(generator.Generator): 847 INTERVAL_ALLOWS_PLURAL_FORM = False 848 JOIN_HINTS = False 849 QUERY_HINTS = False 850 TABLE_HINTS = False 851 LIMIT_FETCH = "LIMIT" 852 RENAME_TABLE_WITH_DB = False 853 NVL2_SUPPORTED = False 854 UNNEST_WITH_ORDINALITY = False 855 COLLATE_IS_FUNC = True 856 LIMIT_ONLY_LITERALS = True 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 859 JSON_KEY_VALUE_PAIR_SEP = "," 860 NULL_ORDERING_SUPPORTED = False 861 IGNORE_NULLS_IN_FUNC = True 862 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 863 CAN_IMPLEMENT_ARRAY_ANY = True 864 SUPPORTS_TO_NUMBER = False 865 NAMED_PLACEHOLDER_TOKEN = "@" 866 HEX_FUNC = "TO_HEX" 867 WITH_PROPERTIES_PREFIX = "OPTIONS" 868 SUPPORTS_EXPLODING_PROJECTIONS = False 869 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 870 SUPPORTS_UNIX_SECONDS = True 871 872 TRANSFORMS = { 873 **generator.Generator.TRANSFORMS, 874 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 875 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 876 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 877 exp.Array: inline_array_unless_query, 878 exp.ArrayContains: _array_contains_sql, 879 exp.ArrayFilter: filter_array_using_unnest, 880 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 881 exp.CollateProperty: lambda self, e: ( 882 f"DEFAULT COLLATE {self.sql(e, 'this')}" 883 if e.args.get("default") 884 else f"COLLATE {self.sql(e, 'this')}" 885 ), 886 exp.Commit: lambda *_: "COMMIT TRANSACTION", 887 exp.CountIf: rename_func("COUNTIF"), 888 exp.Create: _create_sql, 889 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 890 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 891 exp.DateDiff: lambda self, e: self.func( 892 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 893 ), 894 exp.DateFromParts: rename_func("DATE"), 895 exp.DateStrToDate: datestrtodate_sql, 896 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 897 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 898 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 899 exp.DateTrunc: lambda self, e: self.func( 900 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 901 ), 902 exp.FromTimeZone: lambda self, e: self.func( 903 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 904 ), 905 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 906 exp.GroupConcat: lambda self, e: groupconcat_sql( 907 self, e, func_name="STRING_AGG", within_group=False 908 ), 909 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 910 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 911 exp.If: if_sql(false_value="NULL"), 912 exp.ILike: no_ilike_sql, 913 exp.IntDiv: rename_func("DIV"), 914 exp.Int64: rename_func("INT64"), 915 exp.JSONExtract: _json_extract_sql, 916 exp.JSONExtractArray: _json_extract_sql, 917 exp.JSONExtractScalar: _json_extract_sql, 918 exp.JSONFormat: rename_func("TO_JSON_STRING"), 919 exp.Levenshtein: _levenshtein_sql, 920 exp.Max: max_or_greatest, 921 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 922 exp.MD5Digest: rename_func("MD5"), 923 exp.Min: min_or_least, 924 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 925 exp.RegexpExtract: lambda self, e: self.func( 926 "REGEXP_EXTRACT", 927 e.this, 928 e.expression, 929 e.args.get("position"), 930 e.args.get("occurrence"), 931 ), 932 exp.RegexpExtractAll: lambda self, e: self.func( 933 "REGEXP_EXTRACT_ALL", e.this, e.expression 934 ), 935 exp.RegexpReplace: regexp_replace_sql, 936 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 937 exp.ReturnsProperty: _returnsproperty_sql, 938 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 939 exp.Select: transforms.preprocess( 940 [ 941 transforms.explode_to_unnest(), 942 transforms.unqualify_unnest, 943 transforms.eliminate_distinct_on, 944 _alias_ordered_group, 945 transforms.eliminate_semi_and_anti_joins, 946 ] 947 ), 948 exp.SHA: rename_func("SHA1"), 949 exp.SHA2: sha256_sql, 950 exp.StabilityProperty: lambda self, e: ( 951 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 952 ), 953 exp.String: rename_func("STRING"), 954 exp.StrPosition: lambda self, e: ( 955 strposition_sql( 956 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 957 ) 958 ), 959 exp.StrToDate: _str_to_datetime_sql, 960 exp.StrToTime: _str_to_datetime_sql, 961 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 962 exp.TimeFromParts: rename_func("TIME"), 963 exp.TimestampFromParts: rename_func("DATETIME"), 964 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 965 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 966 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 967 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 968 exp.TimeStrToTime: timestrtotime_sql, 969 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 970 exp.TsOrDsAdd: _ts_or_ds_add_sql, 971 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 972 exp.TsOrDsToTime: rename_func("TIME"), 973 exp.TsOrDsToDatetime: rename_func("DATETIME"), 974 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 975 exp.Unhex: rename_func("FROM_HEX"), 976 exp.UnixDate: rename_func("UNIX_DATE"), 977 exp.UnixToTime: _unix_to_time_sql, 978 exp.Uuid: lambda *_: "GENERATE_UUID()", 979 exp.Values: _derived_table_values_to_unnest, 980 exp.VariancePop: rename_func("VAR_POP"), 981 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 982 } 983 984 SUPPORTED_JSON_PATH_PARTS = { 985 exp.JSONPathKey, 986 exp.JSONPathRoot, 987 exp.JSONPathSubscript, 988 } 989 990 TYPE_MAPPING = { 991 **generator.Generator.TYPE_MAPPING, 992 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 993 exp.DataType.Type.BIGINT: "INT64", 994 exp.DataType.Type.BINARY: "BYTES", 995 exp.DataType.Type.BLOB: "BYTES", 996 exp.DataType.Type.BOOLEAN: "BOOL", 997 exp.DataType.Type.CHAR: "STRING", 998 exp.DataType.Type.DECIMAL: "NUMERIC", 999 exp.DataType.Type.DOUBLE: "FLOAT64", 1000 exp.DataType.Type.FLOAT: "FLOAT64", 1001 exp.DataType.Type.INT: "INT64", 1002 exp.DataType.Type.NCHAR: "STRING", 1003 exp.DataType.Type.NVARCHAR: "STRING", 1004 exp.DataType.Type.SMALLINT: "INT64", 1005 exp.DataType.Type.TEXT: "STRING", 1006 exp.DataType.Type.TIMESTAMP: "DATETIME", 1007 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1008 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1009 exp.DataType.Type.TINYINT: "INT64", 1010 exp.DataType.Type.ROWVERSION: "BYTES", 1011 exp.DataType.Type.UUID: "STRING", 1012 exp.DataType.Type.VARBINARY: "BYTES", 1013 exp.DataType.Type.VARCHAR: "STRING", 1014 exp.DataType.Type.VARIANT: "ANY TYPE", 1015 } 1016 1017 PROPERTIES_LOCATION = { 1018 **generator.Generator.PROPERTIES_LOCATION, 1019 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1020 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1021 } 1022 1023 # WINDOW comes after QUALIFY 1024 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1025 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1026 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1027 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1028 } 1029 1030 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1031 RESERVED_KEYWORDS = { 1032 "all", 1033 "and", 1034 "any", 1035 "array", 1036 "as", 1037 "asc", 1038 "assert_rows_modified", 1039 "at", 1040 "between", 1041 "by", 1042 "case", 1043 "cast", 1044 "collate", 1045 "contains", 1046 "create", 1047 "cross", 1048 "cube", 1049 "current", 1050 "default", 1051 "define", 1052 "desc", 1053 "distinct", 1054 "else", 1055 "end", 1056 "enum", 1057 "escape", 1058 "except", 1059 "exclude", 1060 "exists", 1061 "extract", 1062 "false", 1063 "fetch", 1064 "following", 1065 "for", 1066 "from", 1067 "full", 1068 "group", 1069 "grouping", 1070 "groups", 1071 "hash", 1072 "having", 1073 "if", 1074 "ignore", 1075 "in", 1076 "inner", 1077 "intersect", 1078 "interval", 1079 "into", 1080 "is", 1081 "join", 1082 "lateral", 1083 "left", 1084 "like", 1085 "limit", 1086 "lookup", 1087 "merge", 1088 "natural", 1089 "new", 1090 "no", 1091 "not", 1092 "null", 1093 "nulls", 1094 "of", 1095 "on", 1096 "or", 1097 "order", 1098 "outer", 1099 "over", 1100 "partition", 1101 "preceding", 1102 "proto", 1103 "qualify", 1104 "range", 1105 "recursive", 1106 "respect", 1107 "right", 1108 "rollup", 1109 "rows", 1110 "select", 1111 "set", 1112 "some", 1113 "struct", 1114 "tablesample", 1115 "then", 1116 "to", 1117 "treat", 1118 "true", 1119 "unbounded", 1120 "union", 1121 "unnest", 1122 "using", 1123 "when", 1124 "where", 1125 "window", 1126 "with", 1127 "within", 1128 } 1129 1130 def mod_sql(self, expression: exp.Mod) -> str: 1131 this = expression.this 1132 expr = expression.expression 1133 return self.func( 1134 "MOD", 1135 this.unnest() if isinstance(this, exp.Paren) else this, 1136 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1137 ) 1138 1139 def column_parts(self, expression: exp.Column) -> str: 1140 if expression.meta.get("quoted_column"): 1141 # If a column reference is of the form `dataset.table`.name, we need 1142 # to preserve the quoted table path, otherwise the reference breaks 1143 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1144 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1145 return f"{table_path}.{self.sql(expression, 'this')}" 1146 1147 return super().column_parts(expression) 1148 1149 def table_parts(self, expression: exp.Table) -> str: 1150 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1151 # we need to make sure the correct quoting is used in each case. 1152 # 1153 # For example, if there is a CTE x that clashes with a schema name, then the former will 1154 # return the table y in that schema, whereas the latter will return the CTE's y column: 1155 # 1156 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1158 if expression.meta.get("quoted_table"): 1159 table_parts = ".".join(p.name for p in expression.parts) 1160 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1161 1162 return super().table_parts(expression) 1163 1164 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1165 this = expression.this 1166 if isinstance(this, exp.TsOrDsToDatetime): 1167 func_name = "FORMAT_DATETIME" 1168 elif isinstance(this, exp.TsOrDsToTimestamp): 1169 func_name = "FORMAT_TIMESTAMP" 1170 else: 1171 func_name = "FORMAT_DATE" 1172 1173 time_expr = ( 1174 this 1175 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1176 else expression 1177 ) 1178 return self.func(func_name, self.format_time(expression), time_expr.this) 1179 1180 def eq_sql(self, expression: exp.EQ) -> str: 1181 # Operands of = cannot be NULL in BigQuery 1182 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1183 if not isinstance(expression.parent, exp.Update): 1184 return "NULL" 1185 1186 return self.binary(expression, "=") 1187 1188 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1189 parent = expression.parent 1190 1191 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1192 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1193 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1194 return self.func( 1195 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1196 ) 1197 1198 return super().attimezone_sql(expression) 1199 1200 def trycast_sql(self, expression: exp.TryCast) -> str: 1201 return self.cast_sql(expression, safe_prefix="SAFE_") 1202 1203 def bracket_sql(self, expression: exp.Bracket) -> str: 1204 this = expression.this 1205 expressions = expression.expressions 1206 1207 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1208 arg = expressions[0] 1209 if arg.type is None: 1210 from sqlglot.optimizer.annotate_types import annotate_types 1211 1212 arg = annotate_types(arg) 1213 1214 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1215 # BQ doesn't support bracket syntax with string values for structs 1216 return f"{self.sql(this)}.{arg.name}" 1217 1218 expressions_sql = self.expressions(expression, flat=True) 1219 offset = expression.args.get("offset") 1220 1221 if offset == 0: 1222 expressions_sql = f"OFFSET({expressions_sql})" 1223 elif offset == 1: 1224 expressions_sql = f"ORDINAL({expressions_sql})" 1225 elif offset is not None: 1226 self.unsupported(f"Unsupported array offset: {offset}") 1227 1228 if expression.args.get("safe"): 1229 expressions_sql = f"SAFE_{expressions_sql}" 1230 1231 return f"{self.sql(this)}[{expressions_sql}]" 1232 1233 def in_unnest_op(self, expression: exp.Unnest) -> str: 1234 return self.sql(expression) 1235 1236 def version_sql(self, expression: exp.Version) -> str: 1237 if expression.name == "TIMESTAMP": 1238 expression.set("this", "SYSTEM_TIME") 1239 return super().version_sql(expression) 1240 1241 def contains_sql(self, expression: exp.Contains) -> str: 1242 this = expression.this 1243 expr = expression.expression 1244 1245 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1246 this = this.this 1247 expr = expr.this 1248 1249 return self.func("CONTAINS_SUBSTR", this, expr) 1250 1251 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1252 this = expression.this 1253 1254 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1255 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1256 # because they aren't literals and so the above syntax is invalid BigQuery. 1257 if isinstance(this, exp.Array): 1258 elem = seq_get(this.expressions, 0) 1259 if not (elem and elem.find(exp.Query)): 1260 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1261 1262 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1139 def column_parts(self, expression: exp.Column) -> str: 1140 if expression.meta.get("quoted_column"): 1141 # If a column reference is of the form `dataset.table`.name, we need 1142 # to preserve the quoted table path, otherwise the reference breaks 1143 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1144 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1145 return f"{table_path}.{self.sql(expression, 'this')}" 1146 1147 return super().column_parts(expression)
1149 def table_parts(self, expression: exp.Table) -> str: 1150 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1151 # we need to make sure the correct quoting is used in each case. 1152 # 1153 # For example, if there is a CTE x that clashes with a schema name, then the former will 1154 # return the table y in that schema, whereas the latter will return the CTE's y column: 1155 # 1156 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1158 if expression.meta.get("quoted_table"): 1159 table_parts = ".".join(p.name for p in expression.parts) 1160 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1161 1162 return super().table_parts(expression)
1164 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1165 this = expression.this 1166 if isinstance(this, exp.TsOrDsToDatetime): 1167 func_name = "FORMAT_DATETIME" 1168 elif isinstance(this, exp.TsOrDsToTimestamp): 1169 func_name = "FORMAT_TIMESTAMP" 1170 else: 1171 func_name = "FORMAT_DATE" 1172 1173 time_expr = ( 1174 this 1175 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1176 else expression 1177 ) 1178 return self.func(func_name, self.format_time(expression), time_expr.this)
1180 def eq_sql(self, expression: exp.EQ) -> str: 1181 # Operands of = cannot be NULL in BigQuery 1182 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1183 if not isinstance(expression.parent, exp.Update): 1184 return "NULL" 1185 1186 return self.binary(expression, "=")
1188 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1189 parent = expression.parent 1190 1191 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1192 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1193 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1194 return self.func( 1195 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1196 ) 1197 1198 return super().attimezone_sql(expression)
1203 def bracket_sql(self, expression: exp.Bracket) -> str: 1204 this = expression.this 1205 expressions = expression.expressions 1206 1207 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1208 arg = expressions[0] 1209 if arg.type is None: 1210 from sqlglot.optimizer.annotate_types import annotate_types 1211 1212 arg = annotate_types(arg) 1213 1214 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1215 # BQ doesn't support bracket syntax with string values for structs 1216 return f"{self.sql(this)}.{arg.name}" 1217 1218 expressions_sql = self.expressions(expression, flat=True) 1219 offset = expression.args.get("offset") 1220 1221 if offset == 0: 1222 expressions_sql = f"OFFSET({expressions_sql})" 1223 elif offset == 1: 1224 expressions_sql = f"ORDINAL({expressions_sql})" 1225 elif offset is not None: 1226 self.unsupported(f"Unsupported array offset: {offset}") 1227 1228 if expression.args.get("safe"): 1229 expressions_sql = f"SAFE_{expressions_sql}" 1230 1231 return f"{self.sql(this)}[{expressions_sql}]"
1251 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1252 this = expression.this 1253 1254 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1255 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1256 # because they aren't literals and so the above syntax is invalid BigQuery. 1257 if isinstance(this, exp.Array): 1258 elem = seq_get(this.expressions, 0) 1259 if not (elem and elem.find(exp.Query)): 1260 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1261 1262 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql