sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29 str_position_sql, 30) 31from sqlglot.helper import seq_get, split_num_words 32from sqlglot.tokens import TokenType 33from sqlglot.generator import unsupported_args 34 35if t.TYPE_CHECKING: 36 from sqlglot._typing import E, Lit 37 38 from sqlglot.optimizer.annotate_types import TypeAnnotator 39 40logger = logging.getLogger("sqlglot") 41 42 43JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 44 45DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 46 47 48def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 49 if not expression.find_ancestor(exp.From, exp.Join): 50 return self.values_sql(expression) 51 52 structs = [] 53 alias = expression.args.get("alias") 54 for tup in expression.find_all(exp.Tuple): 55 field_aliases = ( 56 alias.columns 57 if alias and alias.columns 58 else (f"_c{i}" for i in range(len(tup.expressions))) 59 ) 60 expressions = [ 61 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 62 for name, fld in zip(field_aliases, tup.expressions) 63 ] 64 structs.append(exp.Struct(expressions=expressions)) 65 66 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 67 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 68 return self.unnest_sql( 69 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 70 ) 71 72 73def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 74 this = expression.this 75 if isinstance(this, exp.Schema): 76 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 77 else: 78 this = self.sql(this) 79 return f"RETURNS {this}" 80 81 82def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 83 returns = expression.find(exp.ReturnsProperty) 84 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 85 expression.set("kind", "TABLE FUNCTION") 86 87 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 88 expression.set("expression", expression.expression.this) 89 90 return self.create_sql(expression) 91 92 93# https://issuetracker.google.com/issues/162294746 94# workaround for bigquery bug when grouping by an expression and then ordering 95# WITH x AS (SELECT 1 y) 96# SELECT y + 1 z 97# FROM x 98# GROUP BY x + 1 99# ORDER by z 100def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 101 if isinstance(expression, exp.Select): 102 group = expression.args.get("group") 103 order = expression.args.get("order") 104 105 if group and order: 106 aliases = { 107 select.this: select.args["alias"] 108 for select in expression.selects 109 if isinstance(select, exp.Alias) 110 } 111 112 for grouped in group.expressions: 113 if grouped.is_int: 114 continue 115 alias = aliases.get(grouped) 116 if alias: 117 grouped.replace(exp.column(alias)) 118 119 return expression 120 121 122def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 123 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 124 if isinstance(expression, exp.CTE) and expression.alias_column_names: 125 cte_query = expression.this 126 127 if cte_query.is_star: 128 logger.warning( 129 "Can't push down CTE column names for star queries. Run the query through" 130 " the optimizer or use 'qualify' to expand the star projections first." 131 ) 132 return expression 133 134 column_names = expression.alias_column_names 135 expression.args["alias"].set("columns", None) 136 137 for name, select in zip(column_names, cte_query.selects): 138 to_replace = select 139 140 if isinstance(select, exp.Alias): 141 select = select.this 142 143 # Inner aliases are shadowed by the CTE column names 144 to_replace.replace(exp.alias_(select, name)) 145 146 return expression 147 148 149def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 150 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 151 this.set("zone", seq_get(args, 2)) 152 return this 153 154 155def _build_timestamp(args: t.List) -> exp.Timestamp: 156 timestamp = exp.Timestamp.from_arg_list(args) 157 timestamp.set("with_tz", True) 158 return timestamp 159 160 161def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 162 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 163 return expr_type.from_arg_list(args) 164 165 166def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 167 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 168 arg = seq_get(args, 0) 169 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 170 171 172def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 173 return self.sql( 174 exp.Exists( 175 this=exp.select("1") 176 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 177 .where(exp.column("_col").eq(expression.right)) 178 ) 179 ) 180 181 182def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 183 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 184 185 186def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 187 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 188 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 189 unit = unit_to_var(expression) 190 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 191 192 193def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 194 scale = expression.args.get("scale") 195 timestamp = expression.this 196 197 if scale in (None, exp.UnixToTime.SECONDS): 198 return self.func("TIMESTAMP_SECONDS", timestamp) 199 if scale == exp.UnixToTime.MILLIS: 200 return self.func("TIMESTAMP_MILLIS", timestamp) 201 if scale == exp.UnixToTime.MICROS: 202 return self.func("TIMESTAMP_MICROS", timestamp) 203 204 unix_seconds = exp.cast( 205 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 206 ) 207 return self.func("TIMESTAMP_SECONDS", unix_seconds) 208 209 210def _build_time(args: t.List) -> exp.Func: 211 if len(args) == 1: 212 return exp.TsOrDsToTime(this=args[0]) 213 if len(args) == 2: 214 return exp.Time.from_arg_list(args) 215 return exp.TimeFromParts.from_arg_list(args) 216 217 218def _build_datetime(args: t.List) -> exp.Func: 219 if len(args) == 1: 220 return exp.TsOrDsToDatetime.from_arg_list(args) 221 if len(args) == 2: 222 return exp.Datetime.from_arg_list(args) 223 return exp.TimestampFromParts.from_arg_list(args) 224 225 226def _build_regexp_extract( 227 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 228) -> t.Callable[[t.List], E]: 229 def _builder(args: t.List) -> E: 230 try: 231 group = re.compile(args[1].name).groups == 1 232 except re.error: 233 group = False 234 235 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 236 return expr_type( 237 this=seq_get(args, 0), 238 expression=seq_get(args, 1), 239 position=seq_get(args, 2), 240 occurrence=seq_get(args, 3), 241 group=exp.Literal.number(1) if group else default_group, 242 ) 243 244 return _builder 245 246 247def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 248 def _builder(args: t.List, dialect: Dialect) -> E: 249 if len(args) == 1: 250 # The default value for the JSONPath is '$' i.e all of the data 251 args.append(exp.Literal.string("$")) 252 return parser.build_extract_json_with_path(expr_type)(args, dialect) 253 254 return _builder 255 256 257def _str_to_datetime_sql( 258 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 259) -> str: 260 this = self.sql(expression, "this") 261 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 262 263 if expression.args.get("safe"): 264 fmt = self.format_time( 265 expression, 266 self.dialect.INVERSE_FORMAT_MAPPING, 267 self.dialect.INVERSE_FORMAT_TRIE, 268 ) 269 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 270 271 fmt = self.format_time(expression) 272 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 273 274 275def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 276 """ 277 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 278 +---------+---------+---------+------------+---------+ 279 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 280 +---------+---------+---------+------------+---------+ 281 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 282 +---------+---------+---------+------------+---------+ 283 """ 284 self._annotate_args(expression) 285 286 this: exp.Expression = expression.this 287 288 self._set_type( 289 expression, 290 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 291 ) 292 return expression 293 294 295@unsupported_args("ins_cost", "del_cost", "sub_cost") 296def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 297 max_dist = expression.args.get("max_dist") 298 if max_dist: 299 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 300 301 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 302 303 304def _build_levenshtein(args: t.List) -> exp.Levenshtein: 305 max_dist = seq_get(args, 2) 306 return exp.Levenshtein( 307 this=seq_get(args, 0), 308 expression=seq_get(args, 1), 309 max_dist=max_dist.expression if max_dist else None, 310 ) 311 312 313def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 314 def _builder(args: t.List) -> exp.TimeToStr: 315 return exp.TimeToStr(this=expr_type(this=seq_get(args, 1)), format=seq_get(args, 0)) 316 317 return _builder 318 319 320def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 321 if len(args) == 3: 322 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 323 324 # Lowercase the operands in case of transpilation, as exp.Contains 325 # is case-sensitive on other dialects 326 this = exp.Lower(this=seq_get(args, 0)) 327 expr = exp.Lower(this=seq_get(args, 1)) 328 329 return exp.Contains(this=this, expression=expr) 330 331 332def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 333 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 334 upper = name.upper() 335 336 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 337 338 if dquote_escaping: 339 self._quote_json_path_key_using_brackets = False 340 341 sql = rename_func(upper)(self, expression) 342 343 if dquote_escaping: 344 self._quote_json_path_key_using_brackets = True 345 346 return sql 347 348 349class BigQuery(Dialect): 350 WEEK_OFFSET = -1 351 UNNEST_COLUMN_ONLY = True 352 SUPPORTS_USER_DEFINED_TYPES = False 353 SUPPORTS_SEMI_ANTI_JOIN = False 354 LOG_BASE_FIRST = False 355 HEX_LOWERCASE = True 356 FORCE_EARLY_ALIAS_REF_EXPANSION = True 357 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 358 PRESERVE_ORIGINAL_NAMES = True 359 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 361 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 362 363 # bigquery udfs are case sensitive 364 NORMALIZE_FUNCTIONS = False 365 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 367 TIME_MAPPING = { 368 "%D": "%m/%d/%y", 369 "%E6S": "%S.%f", 370 "%e": "%-d", 371 } 372 373 FORMAT_MAPPING = { 374 "DD": "%d", 375 "MM": "%m", 376 "MON": "%b", 377 "MONTH": "%B", 378 "YYYY": "%Y", 379 "YY": "%y", 380 "HH": "%I", 381 "HH12": "%I", 382 "HH24": "%H", 383 "MI": "%M", 384 "SS": "%S", 385 "SSSSS": "%f", 386 "TZH": "%z", 387 } 388 389 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 390 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 391 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 392 393 # All set operations require either a DISTINCT or ALL specifier 394 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 395 396 ANNOTATORS = { 397 **Dialect.ANNOTATORS, 398 **{ 399 expr_type: lambda self, e: _annotate_math_functions(self, e) 400 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 401 }, 402 **{ 403 expr_type: lambda self, e: self._annotate_by_args(e, "this") 404 for expr_type in ( 405 exp.Left, 406 exp.Right, 407 exp.Lower, 408 exp.Upper, 409 exp.Pad, 410 exp.Trim, 411 exp.RegexpExtract, 412 exp.RegexpReplace, 413 exp.Repeat, 414 exp.Substring, 415 ) 416 }, 417 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 418 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 419 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 420 } 421 422 def normalize_identifier(self, expression: E) -> E: 423 if ( 424 isinstance(expression, exp.Identifier) 425 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 426 ): 427 parent = expression.parent 428 while isinstance(parent, exp.Dot): 429 parent = parent.parent 430 431 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 432 # by default. The following check uses a heuristic to detect tables based on whether 433 # they are qualified. This should generally be correct, because tables in BigQuery 434 # must be qualified with at least a dataset, unless @@dataset_id is set. 435 case_sensitive = ( 436 isinstance(parent, exp.UserDefinedFunction) 437 or ( 438 isinstance(parent, exp.Table) 439 and parent.db 440 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 441 ) 442 or expression.meta.get("is_table") 443 ) 444 if not case_sensitive: 445 expression.set("this", expression.this.lower()) 446 447 return expression 448 449 class Tokenizer(tokens.Tokenizer): 450 QUOTES = ["'", '"', '"""', "'''"] 451 COMMENTS = ["--", "#", ("/*", "*/")] 452 IDENTIFIERS = ["`"] 453 STRING_ESCAPES = ["\\"] 454 455 HEX_STRINGS = [("0x", ""), ("0X", "")] 456 457 BYTE_STRINGS = [ 458 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 459 ] 460 461 RAW_STRINGS = [ 462 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 463 ] 464 465 KEYWORDS = { 466 **tokens.Tokenizer.KEYWORDS, 467 "ANY TYPE": TokenType.VARIANT, 468 "BEGIN": TokenType.COMMAND, 469 "BEGIN TRANSACTION": TokenType.BEGIN, 470 "BYTEINT": TokenType.INT, 471 "BYTES": TokenType.BINARY, 472 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 473 "DATETIME": TokenType.TIMESTAMP, 474 "DECLARE": TokenType.COMMAND, 475 "ELSEIF": TokenType.COMMAND, 476 "EXCEPTION": TokenType.COMMAND, 477 "FLOAT64": TokenType.DOUBLE, 478 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 479 "MODEL": TokenType.MODEL, 480 "NOT DETERMINISTIC": TokenType.VOLATILE, 481 "RECORD": TokenType.STRUCT, 482 "TIMESTAMP": TokenType.TIMESTAMPTZ, 483 } 484 KEYWORDS.pop("DIV") 485 KEYWORDS.pop("VALUES") 486 KEYWORDS.pop("/*+") 487 488 class Parser(parser.Parser): 489 PREFIXED_PIVOT_COLUMNS = True 490 LOG_DEFAULTS_TO_LN = True 491 SUPPORTS_IMPLICIT_UNNEST = True 492 493 FUNCTIONS = { 494 **parser.Parser.FUNCTIONS, 495 "CONTAINS_SUBSTR": _build_contains_substring, 496 "DATE": _build_date, 497 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 498 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 499 "DATE_TRUNC": lambda args: exp.DateTrunc( 500 unit=exp.Literal.string(str(seq_get(args, 1))), 501 this=seq_get(args, 0), 502 zone=seq_get(args, 2), 503 ), 504 "DATETIME": _build_datetime, 505 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 506 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 507 "DIV": binary_from_function(exp.IntDiv), 508 "EDIT_DISTANCE": _build_levenshtein, 509 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 510 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 511 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 512 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 513 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 514 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 515 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 516 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 517 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 518 "MD5": exp.MD5Digest.from_arg_list, 519 "TO_HEX": _build_to_hex, 520 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 521 [seq_get(args, 1), seq_get(args, 0)] 522 ), 523 "PARSE_TIMESTAMP": _build_parse_timestamp, 524 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 525 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 526 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 527 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 528 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 529 ), 530 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 531 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 532 "SPLIT": lambda args: exp.Split( 533 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 534 this=seq_get(args, 0), 535 expression=seq_get(args, 1) or exp.Literal.string(","), 536 ), 537 "STRPOS": exp.StrPosition.from_arg_list, 538 "TIME": _build_time, 539 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 540 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 541 "TIMESTAMP": _build_timestamp, 542 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 543 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 544 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 545 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 546 ), 547 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 548 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 549 ), 550 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 551 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 552 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 553 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 554 } 555 556 FUNCTION_PARSERS = { 557 **parser.Parser.FUNCTION_PARSERS, 558 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 559 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 560 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 561 } 562 FUNCTION_PARSERS.pop("TRIM") 563 564 NO_PAREN_FUNCTIONS = { 565 **parser.Parser.NO_PAREN_FUNCTIONS, 566 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 567 } 568 569 NESTED_TYPE_TOKENS = { 570 *parser.Parser.NESTED_TYPE_TOKENS, 571 TokenType.TABLE, 572 } 573 574 PROPERTY_PARSERS = { 575 **parser.Parser.PROPERTY_PARSERS, 576 "NOT DETERMINISTIC": lambda self: self.expression( 577 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 578 ), 579 "OPTIONS": lambda self: self._parse_with_property(), 580 } 581 582 CONSTRAINT_PARSERS = { 583 **parser.Parser.CONSTRAINT_PARSERS, 584 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 585 } 586 587 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 588 RANGE_PARSERS.pop(TokenType.OVERLAPS) 589 590 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 591 592 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 593 594 STATEMENT_PARSERS = { 595 **parser.Parser.STATEMENT_PARSERS, 596 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 597 TokenType.END: lambda self: self._parse_as_command(self._prev), 598 TokenType.FOR: lambda self: self._parse_for_in(), 599 } 600 601 BRACKET_OFFSETS = { 602 "OFFSET": (0, False), 603 "ORDINAL": (1, False), 604 "SAFE_OFFSET": (0, True), 605 "SAFE_ORDINAL": (1, True), 606 } 607 608 def _parse_for_in(self) -> exp.ForIn: 609 this = self._parse_range() 610 self._match_text_seq("DO") 611 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 612 613 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 614 this = super()._parse_table_part(schema=schema) or self._parse_number() 615 616 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 617 if isinstance(this, exp.Identifier): 618 table_name = this.name 619 while self._match(TokenType.DASH, advance=False) and self._next: 620 start = self._curr 621 while self._is_connected() and not self._match_set( 622 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 623 ): 624 self._advance() 625 626 table_name += self._find_sql(start, self._prev) 627 628 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 629 elif isinstance(this, exp.Literal): 630 table_name = this.name 631 632 if self._is_connected() and self._parse_var(any_token=True): 633 table_name += self._prev.text 634 635 this = exp.Identifier(this=table_name, quoted=True) 636 637 return this 638 639 def _parse_table_parts( 640 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 641 ) -> exp.Table: 642 table = super()._parse_table_parts( 643 schema=schema, is_db_reference=is_db_reference, wildcard=True 644 ) 645 646 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 647 if not table.catalog: 648 if table.db: 649 parts = table.db.split(".") 650 if len(parts) == 2 and not table.args["db"].quoted: 651 table.set("catalog", exp.Identifier(this=parts[0])) 652 table.set("db", exp.Identifier(this=parts[1])) 653 else: 654 parts = table.name.split(".") 655 if len(parts) == 2 and not table.this.quoted: 656 table.set("db", exp.Identifier(this=parts[0])) 657 table.set("this", exp.Identifier(this=parts[1])) 658 659 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 660 alias = table.this 661 catalog, db, this, *rest = ( 662 exp.to_identifier(p, quoted=True) 663 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 664 ) 665 666 if rest and this: 667 this = exp.Dot.build([this, *rest]) # type: ignore 668 669 table = exp.Table( 670 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 671 ) 672 table.meta["quoted_table"] = True 673 else: 674 alias = None 675 676 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 677 # dataset, so if the project identifier is omitted we need to fix the ast so that 678 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 679 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 680 # views, because it would seem like the "catalog" part is set, when it'd actually 681 # be the region/dataset. Merging the two identifiers into a single one is done to 682 # avoid producing a 4-part Table reference, which would cause issues in the schema 683 # module, when there are 3-part table names mixed with information schema views. 684 # 685 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 686 table_parts = table.parts 687 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 688 # We need to alias the table here to avoid breaking existing qualified columns. 689 # This is expected to be safe, because if there's an actual alias coming up in 690 # the token stream, it will overwrite this one. If there isn't one, we are only 691 # exposing the name that can be used to reference the view explicitly (a no-op). 692 exp.alias_( 693 table, 694 t.cast(exp.Identifier, alias or table_parts[-1]), 695 table=True, 696 copy=False, 697 ) 698 699 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 700 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 701 table.set("db", seq_get(table_parts, -3)) 702 table.set("catalog", seq_get(table_parts, -4)) 703 704 return table 705 706 def _parse_column(self) -> t.Optional[exp.Expression]: 707 column = super()._parse_column() 708 if isinstance(column, exp.Column): 709 parts = column.parts 710 if any("." in p.name for p in parts): 711 catalog, db, table, this, *rest = ( 712 exp.to_identifier(p, quoted=True) 713 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 714 ) 715 716 if rest and this: 717 this = exp.Dot.build([this, *rest]) # type: ignore 718 719 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 720 column.meta["quoted_column"] = True 721 722 return column 723 724 @t.overload 725 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 726 727 @t.overload 728 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 729 730 def _parse_json_object(self, agg=False): 731 json_object = super()._parse_json_object() 732 array_kv_pair = seq_get(json_object.expressions, 0) 733 734 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 735 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 736 if ( 737 array_kv_pair 738 and isinstance(array_kv_pair.this, exp.Array) 739 and isinstance(array_kv_pair.expression, exp.Array) 740 ): 741 keys = array_kv_pair.this.expressions 742 values = array_kv_pair.expression.expressions 743 744 json_object.set( 745 "expressions", 746 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 747 ) 748 749 return json_object 750 751 def _parse_bracket( 752 self, this: t.Optional[exp.Expression] = None 753 ) -> t.Optional[exp.Expression]: 754 bracket = super()._parse_bracket(this) 755 756 if this is bracket: 757 return bracket 758 759 if isinstance(bracket, exp.Bracket): 760 for expression in bracket.expressions: 761 name = expression.name.upper() 762 763 if name not in self.BRACKET_OFFSETS: 764 break 765 766 offset, safe = self.BRACKET_OFFSETS[name] 767 bracket.set("offset", offset) 768 bracket.set("safe", safe) 769 expression.replace(expression.expressions[0]) 770 771 return bracket 772 773 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 774 unnest = super()._parse_unnest(with_alias=with_alias) 775 776 if not unnest: 777 return None 778 779 unnest_expr = seq_get(unnest.expressions, 0) 780 if unnest_expr: 781 from sqlglot.optimizer.annotate_types import annotate_types 782 783 unnest_expr = annotate_types(unnest_expr) 784 785 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 786 # in contrast to other dialects such as DuckDB which flattens only the array by default 787 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 788 array_elem.is_type(exp.DataType.Type.STRUCT) 789 for array_elem in unnest_expr._type.expressions 790 ): 791 unnest.set("explode_array", True) 792 793 return unnest 794 795 def _parse_make_interval(self) -> exp.MakeInterval: 796 expr = exp.MakeInterval() 797 798 for arg_key in expr.arg_types: 799 value = self._parse_lambda() 800 801 if not value: 802 break 803 804 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 805 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 806 if isinstance(value, exp.Kwarg): 807 arg_key = value.this.name 808 809 expr.set(arg_key, value) 810 811 self._match(TokenType.COMMA) 812 813 return expr 814 815 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 816 expr = self.expression( 817 exp.FeaturesAtTime, 818 this=(self._match(TokenType.TABLE) and self._parse_table()) 819 or self._parse_select(nested=True), 820 ) 821 822 while self._match(TokenType.COMMA): 823 arg = self._parse_lambda() 824 825 # Get the LHS of the Kwarg and set the arg to that value, e.g 826 # "num_rows => 1" sets the expr's `num_rows` arg 827 if arg: 828 expr.set(arg.this.name, arg) 829 830 return expr 831 832 class Generator(generator.Generator): 833 INTERVAL_ALLOWS_PLURAL_FORM = False 834 JOIN_HINTS = False 835 QUERY_HINTS = False 836 TABLE_HINTS = False 837 LIMIT_FETCH = "LIMIT" 838 RENAME_TABLE_WITH_DB = False 839 NVL2_SUPPORTED = False 840 UNNEST_WITH_ORDINALITY = False 841 COLLATE_IS_FUNC = True 842 LIMIT_ONLY_LITERALS = True 843 SUPPORTS_TABLE_ALIAS_COLUMNS = False 844 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 845 JSON_KEY_VALUE_PAIR_SEP = "," 846 NULL_ORDERING_SUPPORTED = False 847 IGNORE_NULLS_IN_FUNC = True 848 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 849 CAN_IMPLEMENT_ARRAY_ANY = True 850 SUPPORTS_TO_NUMBER = False 851 NAMED_PLACEHOLDER_TOKEN = "@" 852 HEX_FUNC = "TO_HEX" 853 WITH_PROPERTIES_PREFIX = "OPTIONS" 854 SUPPORTS_EXPLODING_PROJECTIONS = False 855 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 856 SUPPORTS_UNIX_SECONDS = True 857 858 TRANSFORMS = { 859 **generator.Generator.TRANSFORMS, 860 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 861 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 862 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 863 exp.Array: inline_array_unless_query, 864 exp.ArrayContains: _array_contains_sql, 865 exp.ArrayFilter: filter_array_using_unnest, 866 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 867 exp.CollateProperty: lambda self, e: ( 868 f"DEFAULT COLLATE {self.sql(e, 'this')}" 869 if e.args.get("default") 870 else f"COLLATE {self.sql(e, 'this')}" 871 ), 872 exp.Commit: lambda *_: "COMMIT TRANSACTION", 873 exp.CountIf: rename_func("COUNTIF"), 874 exp.Create: _create_sql, 875 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 876 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 877 exp.DateDiff: lambda self, e: self.func( 878 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 879 ), 880 exp.DateFromParts: rename_func("DATE"), 881 exp.DateStrToDate: datestrtodate_sql, 882 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 883 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 884 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 885 exp.DateTrunc: lambda self, e: self.func( 886 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 887 ), 888 exp.FromTimeZone: lambda self, e: self.func( 889 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 890 ), 891 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 892 exp.GroupConcat: rename_func("STRING_AGG"), 893 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 894 exp.If: if_sql(false_value="NULL"), 895 exp.ILike: no_ilike_sql, 896 exp.IntDiv: rename_func("DIV"), 897 exp.Int64: rename_func("INT64"), 898 exp.JSONExtract: _json_extract_sql, 899 exp.JSONExtractArray: _json_extract_sql, 900 exp.JSONExtractScalar: _json_extract_sql, 901 exp.JSONFormat: rename_func("TO_JSON_STRING"), 902 exp.Levenshtein: _levenshtein_sql, 903 exp.Max: max_or_greatest, 904 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 905 exp.MD5Digest: rename_func("MD5"), 906 exp.Min: min_or_least, 907 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 908 exp.RegexpExtract: lambda self, e: self.func( 909 "REGEXP_EXTRACT", 910 e.this, 911 e.expression, 912 e.args.get("position"), 913 e.args.get("occurrence"), 914 ), 915 exp.RegexpExtractAll: lambda self, e: self.func( 916 "REGEXP_EXTRACT_ALL", e.this, e.expression 917 ), 918 exp.RegexpReplace: regexp_replace_sql, 919 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 920 exp.ReturnsProperty: _returnsproperty_sql, 921 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 922 exp.Select: transforms.preprocess( 923 [ 924 transforms.explode_to_unnest(), 925 transforms.unqualify_unnest, 926 transforms.eliminate_distinct_on, 927 _alias_ordered_group, 928 transforms.eliminate_semi_and_anti_joins, 929 ] 930 ), 931 exp.SHA: rename_func("SHA1"), 932 exp.SHA2: sha256_sql, 933 exp.StabilityProperty: lambda self, e: ( 934 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 935 ), 936 exp.String: rename_func("STRING"), 937 exp.StrPosition: str_position_sql, 938 exp.StrToDate: _str_to_datetime_sql, 939 exp.StrToTime: _str_to_datetime_sql, 940 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 941 exp.TimeFromParts: rename_func("TIME"), 942 exp.TimestampFromParts: rename_func("DATETIME"), 943 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 944 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 945 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 946 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 947 exp.TimeStrToTime: timestrtotime_sql, 948 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 949 exp.TsOrDsAdd: _ts_or_ds_add_sql, 950 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 951 exp.TsOrDsToTime: rename_func("TIME"), 952 exp.TsOrDsToDatetime: rename_func("DATETIME"), 953 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 954 exp.Unhex: rename_func("FROM_HEX"), 955 exp.UnixDate: rename_func("UNIX_DATE"), 956 exp.UnixToTime: _unix_to_time_sql, 957 exp.Uuid: lambda *_: "GENERATE_UUID()", 958 exp.Values: _derived_table_values_to_unnest, 959 exp.VariancePop: rename_func("VAR_POP"), 960 } 961 962 SUPPORTED_JSON_PATH_PARTS = { 963 exp.JSONPathKey, 964 exp.JSONPathRoot, 965 exp.JSONPathSubscript, 966 } 967 968 TYPE_MAPPING = { 969 **generator.Generator.TYPE_MAPPING, 970 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 971 exp.DataType.Type.BIGINT: "INT64", 972 exp.DataType.Type.BINARY: "BYTES", 973 exp.DataType.Type.BOOLEAN: "BOOL", 974 exp.DataType.Type.CHAR: "STRING", 975 exp.DataType.Type.DECIMAL: "NUMERIC", 976 exp.DataType.Type.DOUBLE: "FLOAT64", 977 exp.DataType.Type.FLOAT: "FLOAT64", 978 exp.DataType.Type.INT: "INT64", 979 exp.DataType.Type.NCHAR: "STRING", 980 exp.DataType.Type.NVARCHAR: "STRING", 981 exp.DataType.Type.SMALLINT: "INT64", 982 exp.DataType.Type.TEXT: "STRING", 983 exp.DataType.Type.TIMESTAMP: "DATETIME", 984 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 985 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 986 exp.DataType.Type.TINYINT: "INT64", 987 exp.DataType.Type.ROWVERSION: "BYTES", 988 exp.DataType.Type.UUID: "STRING", 989 exp.DataType.Type.VARBINARY: "BYTES", 990 exp.DataType.Type.VARCHAR: "STRING", 991 exp.DataType.Type.VARIANT: "ANY TYPE", 992 } 993 994 PROPERTIES_LOCATION = { 995 **generator.Generator.PROPERTIES_LOCATION, 996 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 997 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 998 } 999 1000 # WINDOW comes after QUALIFY 1001 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1002 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1003 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1004 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1005 } 1006 1007 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1008 RESERVED_KEYWORDS = { 1009 "all", 1010 "and", 1011 "any", 1012 "array", 1013 "as", 1014 "asc", 1015 "assert_rows_modified", 1016 "at", 1017 "between", 1018 "by", 1019 "case", 1020 "cast", 1021 "collate", 1022 "contains", 1023 "create", 1024 "cross", 1025 "cube", 1026 "current", 1027 "default", 1028 "define", 1029 "desc", 1030 "distinct", 1031 "else", 1032 "end", 1033 "enum", 1034 "escape", 1035 "except", 1036 "exclude", 1037 "exists", 1038 "extract", 1039 "false", 1040 "fetch", 1041 "following", 1042 "for", 1043 "from", 1044 "full", 1045 "group", 1046 "grouping", 1047 "groups", 1048 "hash", 1049 "having", 1050 "if", 1051 "ignore", 1052 "in", 1053 "inner", 1054 "intersect", 1055 "interval", 1056 "into", 1057 "is", 1058 "join", 1059 "lateral", 1060 "left", 1061 "like", 1062 "limit", 1063 "lookup", 1064 "merge", 1065 "natural", 1066 "new", 1067 "no", 1068 "not", 1069 "null", 1070 "nulls", 1071 "of", 1072 "on", 1073 "or", 1074 "order", 1075 "outer", 1076 "over", 1077 "partition", 1078 "preceding", 1079 "proto", 1080 "qualify", 1081 "range", 1082 "recursive", 1083 "respect", 1084 "right", 1085 "rollup", 1086 "rows", 1087 "select", 1088 "set", 1089 "some", 1090 "struct", 1091 "tablesample", 1092 "then", 1093 "to", 1094 "treat", 1095 "true", 1096 "unbounded", 1097 "union", 1098 "unnest", 1099 "using", 1100 "when", 1101 "where", 1102 "window", 1103 "with", 1104 "within", 1105 } 1106 1107 def mod_sql(self, expression: exp.Mod) -> str: 1108 this = expression.this 1109 expr = expression.expression 1110 return self.func( 1111 "MOD", 1112 this.unnest() if isinstance(this, exp.Paren) else this, 1113 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1114 ) 1115 1116 def column_parts(self, expression: exp.Column) -> str: 1117 if expression.meta.get("quoted_column"): 1118 # If a column reference is of the form `dataset.table`.name, we need 1119 # to preserve the quoted table path, otherwise the reference breaks 1120 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1121 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1122 return f"{table_path}.{self.sql(expression, 'this')}" 1123 1124 return super().column_parts(expression) 1125 1126 def table_parts(self, expression: exp.Table) -> str: 1127 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1128 # we need to make sure the correct quoting is used in each case. 1129 # 1130 # For example, if there is a CTE x that clashes with a schema name, then the former will 1131 # return the table y in that schema, whereas the latter will return the CTE's y column: 1132 # 1133 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1134 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1135 if expression.meta.get("quoted_table"): 1136 table_parts = ".".join(p.name for p in expression.parts) 1137 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1138 1139 return super().table_parts(expression) 1140 1141 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1142 this = expression.this 1143 if isinstance(this, exp.TsOrDsToDatetime): 1144 func_name = "FORMAT_DATETIME" 1145 elif isinstance(this, exp.TsOrDsToTimestamp): 1146 func_name = "FORMAT_TIMESTAMP" 1147 else: 1148 func_name = "FORMAT_DATE" 1149 1150 time_expr = ( 1151 this 1152 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1153 else expression 1154 ) 1155 return self.func(func_name, self.format_time(expression), time_expr.this) 1156 1157 def eq_sql(self, expression: exp.EQ) -> str: 1158 # Operands of = cannot be NULL in BigQuery 1159 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1160 if not isinstance(expression.parent, exp.Update): 1161 return "NULL" 1162 1163 return self.binary(expression, "=") 1164 1165 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1166 parent = expression.parent 1167 1168 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1169 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1170 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1171 return self.func( 1172 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1173 ) 1174 1175 return super().attimezone_sql(expression) 1176 1177 def trycast_sql(self, expression: exp.TryCast) -> str: 1178 return self.cast_sql(expression, safe_prefix="SAFE_") 1179 1180 def bracket_sql(self, expression: exp.Bracket) -> str: 1181 this = expression.this 1182 expressions = expression.expressions 1183 1184 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1185 arg = expressions[0] 1186 if arg.type is None: 1187 from sqlglot.optimizer.annotate_types import annotate_types 1188 1189 arg = annotate_types(arg) 1190 1191 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1192 # BQ doesn't support bracket syntax with string values for structs 1193 return f"{self.sql(this)}.{arg.name}" 1194 1195 expressions_sql = self.expressions(expression, flat=True) 1196 offset = expression.args.get("offset") 1197 1198 if offset == 0: 1199 expressions_sql = f"OFFSET({expressions_sql})" 1200 elif offset == 1: 1201 expressions_sql = f"ORDINAL({expressions_sql})" 1202 elif offset is not None: 1203 self.unsupported(f"Unsupported array offset: {offset}") 1204 1205 if expression.args.get("safe"): 1206 expressions_sql = f"SAFE_{expressions_sql}" 1207 1208 return f"{self.sql(this)}[{expressions_sql}]" 1209 1210 def in_unnest_op(self, expression: exp.Unnest) -> str: 1211 return self.sql(expression) 1212 1213 def version_sql(self, expression: exp.Version) -> str: 1214 if expression.name == "TIMESTAMP": 1215 expression.set("this", "SYSTEM_TIME") 1216 return super().version_sql(expression) 1217 1218 def contains_sql(self, expression: exp.Contains) -> str: 1219 this = expression.this 1220 expr = expression.expression 1221 1222 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1223 this = this.this 1224 expr = expr.this 1225 1226 return self.func("CONTAINS_SUBSTR", this, expr)
350class BigQuery(Dialect): 351 WEEK_OFFSET = -1 352 UNNEST_COLUMN_ONLY = True 353 SUPPORTS_USER_DEFINED_TYPES = False 354 SUPPORTS_SEMI_ANTI_JOIN = False 355 LOG_BASE_FIRST = False 356 HEX_LOWERCASE = True 357 FORCE_EARLY_ALIAS_REF_EXPANSION = True 358 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 359 PRESERVE_ORIGINAL_NAMES = True 360 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 362 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 363 364 # bigquery udfs are case sensitive 365 NORMALIZE_FUNCTIONS = False 366 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 368 TIME_MAPPING = { 369 "%D": "%m/%d/%y", 370 "%E6S": "%S.%f", 371 "%e": "%-d", 372 } 373 374 FORMAT_MAPPING = { 375 "DD": "%d", 376 "MM": "%m", 377 "MON": "%b", 378 "MONTH": "%B", 379 "YYYY": "%Y", 380 "YY": "%y", 381 "HH": "%I", 382 "HH12": "%I", 383 "HH24": "%H", 384 "MI": "%M", 385 "SS": "%S", 386 "SSSSS": "%f", 387 "TZH": "%z", 388 } 389 390 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 391 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 392 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 393 394 # All set operations require either a DISTINCT or ALL specifier 395 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 396 397 ANNOTATORS = { 398 **Dialect.ANNOTATORS, 399 **{ 400 expr_type: lambda self, e: _annotate_math_functions(self, e) 401 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 402 }, 403 **{ 404 expr_type: lambda self, e: self._annotate_by_args(e, "this") 405 for expr_type in ( 406 exp.Left, 407 exp.Right, 408 exp.Lower, 409 exp.Upper, 410 exp.Pad, 411 exp.Trim, 412 exp.RegexpExtract, 413 exp.RegexpReplace, 414 exp.Repeat, 415 exp.Substring, 416 ) 417 }, 418 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 419 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 420 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 421 } 422 423 def normalize_identifier(self, expression: E) -> E: 424 if ( 425 isinstance(expression, exp.Identifier) 426 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 427 ): 428 parent = expression.parent 429 while isinstance(parent, exp.Dot): 430 parent = parent.parent 431 432 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 433 # by default. The following check uses a heuristic to detect tables based on whether 434 # they are qualified. This should generally be correct, because tables in BigQuery 435 # must be qualified with at least a dataset, unless @@dataset_id is set. 436 case_sensitive = ( 437 isinstance(parent, exp.UserDefinedFunction) 438 or ( 439 isinstance(parent, exp.Table) 440 and parent.db 441 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 442 ) 443 or expression.meta.get("is_table") 444 ) 445 if not case_sensitive: 446 expression.set("this", expression.this.lower()) 447 448 return expression 449 450 class Tokenizer(tokens.Tokenizer): 451 QUOTES = ["'", '"', '"""', "'''"] 452 COMMENTS = ["--", "#", ("/*", "*/")] 453 IDENTIFIERS = ["`"] 454 STRING_ESCAPES = ["\\"] 455 456 HEX_STRINGS = [("0x", ""), ("0X", "")] 457 458 BYTE_STRINGS = [ 459 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 460 ] 461 462 RAW_STRINGS = [ 463 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 464 ] 465 466 KEYWORDS = { 467 **tokens.Tokenizer.KEYWORDS, 468 "ANY TYPE": TokenType.VARIANT, 469 "BEGIN": TokenType.COMMAND, 470 "BEGIN TRANSACTION": TokenType.BEGIN, 471 "BYTEINT": TokenType.INT, 472 "BYTES": TokenType.BINARY, 473 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 474 "DATETIME": TokenType.TIMESTAMP, 475 "DECLARE": TokenType.COMMAND, 476 "ELSEIF": TokenType.COMMAND, 477 "EXCEPTION": TokenType.COMMAND, 478 "FLOAT64": TokenType.DOUBLE, 479 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 480 "MODEL": TokenType.MODEL, 481 "NOT DETERMINISTIC": TokenType.VOLATILE, 482 "RECORD": TokenType.STRUCT, 483 "TIMESTAMP": TokenType.TIMESTAMPTZ, 484 } 485 KEYWORDS.pop("DIV") 486 KEYWORDS.pop("VALUES") 487 KEYWORDS.pop("/*+") 488 489 class Parser(parser.Parser): 490 PREFIXED_PIVOT_COLUMNS = True 491 LOG_DEFAULTS_TO_LN = True 492 SUPPORTS_IMPLICIT_UNNEST = True 493 494 FUNCTIONS = { 495 **parser.Parser.FUNCTIONS, 496 "CONTAINS_SUBSTR": _build_contains_substring, 497 "DATE": _build_date, 498 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 499 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 500 "DATE_TRUNC": lambda args: exp.DateTrunc( 501 unit=exp.Literal.string(str(seq_get(args, 1))), 502 this=seq_get(args, 0), 503 zone=seq_get(args, 2), 504 ), 505 "DATETIME": _build_datetime, 506 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 507 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 508 "DIV": binary_from_function(exp.IntDiv), 509 "EDIT_DISTANCE": _build_levenshtein, 510 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 511 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 512 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 513 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 514 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 515 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 517 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 518 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 519 "MD5": exp.MD5Digest.from_arg_list, 520 "TO_HEX": _build_to_hex, 521 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 522 [seq_get(args, 1), seq_get(args, 0)] 523 ), 524 "PARSE_TIMESTAMP": _build_parse_timestamp, 525 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 526 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 527 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 528 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 529 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 530 ), 531 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 532 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 533 "SPLIT": lambda args: exp.Split( 534 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 535 this=seq_get(args, 0), 536 expression=seq_get(args, 1) or exp.Literal.string(","), 537 ), 538 "STRPOS": exp.StrPosition.from_arg_list, 539 "TIME": _build_time, 540 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 541 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 542 "TIMESTAMP": _build_timestamp, 543 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 544 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 545 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 546 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 547 ), 548 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 549 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 550 ), 551 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 552 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 553 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 554 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 555 } 556 557 FUNCTION_PARSERS = { 558 **parser.Parser.FUNCTION_PARSERS, 559 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 560 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 561 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 562 } 563 FUNCTION_PARSERS.pop("TRIM") 564 565 NO_PAREN_FUNCTIONS = { 566 **parser.Parser.NO_PAREN_FUNCTIONS, 567 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 568 } 569 570 NESTED_TYPE_TOKENS = { 571 *parser.Parser.NESTED_TYPE_TOKENS, 572 TokenType.TABLE, 573 } 574 575 PROPERTY_PARSERS = { 576 **parser.Parser.PROPERTY_PARSERS, 577 "NOT DETERMINISTIC": lambda self: self.expression( 578 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 579 ), 580 "OPTIONS": lambda self: self._parse_with_property(), 581 } 582 583 CONSTRAINT_PARSERS = { 584 **parser.Parser.CONSTRAINT_PARSERS, 585 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 586 } 587 588 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 589 RANGE_PARSERS.pop(TokenType.OVERLAPS) 590 591 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 592 593 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 594 595 STATEMENT_PARSERS = { 596 **parser.Parser.STATEMENT_PARSERS, 597 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 598 TokenType.END: lambda self: self._parse_as_command(self._prev), 599 TokenType.FOR: lambda self: self._parse_for_in(), 600 } 601 602 BRACKET_OFFSETS = { 603 "OFFSET": (0, False), 604 "ORDINAL": (1, False), 605 "SAFE_OFFSET": (0, True), 606 "SAFE_ORDINAL": (1, True), 607 } 608 609 def _parse_for_in(self) -> exp.ForIn: 610 this = self._parse_range() 611 self._match_text_seq("DO") 612 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 613 614 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 615 this = super()._parse_table_part(schema=schema) or self._parse_number() 616 617 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 618 if isinstance(this, exp.Identifier): 619 table_name = this.name 620 while self._match(TokenType.DASH, advance=False) and self._next: 621 start = self._curr 622 while self._is_connected() and not self._match_set( 623 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 624 ): 625 self._advance() 626 627 table_name += self._find_sql(start, self._prev) 628 629 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 630 elif isinstance(this, exp.Literal): 631 table_name = this.name 632 633 if self._is_connected() and self._parse_var(any_token=True): 634 table_name += self._prev.text 635 636 this = exp.Identifier(this=table_name, quoted=True) 637 638 return this 639 640 def _parse_table_parts( 641 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 642 ) -> exp.Table: 643 table = super()._parse_table_parts( 644 schema=schema, is_db_reference=is_db_reference, wildcard=True 645 ) 646 647 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 648 if not table.catalog: 649 if table.db: 650 parts = table.db.split(".") 651 if len(parts) == 2 and not table.args["db"].quoted: 652 table.set("catalog", exp.Identifier(this=parts[0])) 653 table.set("db", exp.Identifier(this=parts[1])) 654 else: 655 parts = table.name.split(".") 656 if len(parts) == 2 and not table.this.quoted: 657 table.set("db", exp.Identifier(this=parts[0])) 658 table.set("this", exp.Identifier(this=parts[1])) 659 660 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 661 alias = table.this 662 catalog, db, this, *rest = ( 663 exp.to_identifier(p, quoted=True) 664 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 665 ) 666 667 if rest and this: 668 this = exp.Dot.build([this, *rest]) # type: ignore 669 670 table = exp.Table( 671 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 672 ) 673 table.meta["quoted_table"] = True 674 else: 675 alias = None 676 677 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 678 # dataset, so if the project identifier is omitted we need to fix the ast so that 679 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 680 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 681 # views, because it would seem like the "catalog" part is set, when it'd actually 682 # be the region/dataset. Merging the two identifiers into a single one is done to 683 # avoid producing a 4-part Table reference, which would cause issues in the schema 684 # module, when there are 3-part table names mixed with information schema views. 685 # 686 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 687 table_parts = table.parts 688 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 689 # We need to alias the table here to avoid breaking existing qualified columns. 690 # This is expected to be safe, because if there's an actual alias coming up in 691 # the token stream, it will overwrite this one. If there isn't one, we are only 692 # exposing the name that can be used to reference the view explicitly (a no-op). 693 exp.alias_( 694 table, 695 t.cast(exp.Identifier, alias or table_parts[-1]), 696 table=True, 697 copy=False, 698 ) 699 700 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 701 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 702 table.set("db", seq_get(table_parts, -3)) 703 table.set("catalog", seq_get(table_parts, -4)) 704 705 return table 706 707 def _parse_column(self) -> t.Optional[exp.Expression]: 708 column = super()._parse_column() 709 if isinstance(column, exp.Column): 710 parts = column.parts 711 if any("." in p.name for p in parts): 712 catalog, db, table, this, *rest = ( 713 exp.to_identifier(p, quoted=True) 714 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 715 ) 716 717 if rest and this: 718 this = exp.Dot.build([this, *rest]) # type: ignore 719 720 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 721 column.meta["quoted_column"] = True 722 723 return column 724 725 @t.overload 726 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 727 728 @t.overload 729 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 730 731 def _parse_json_object(self, agg=False): 732 json_object = super()._parse_json_object() 733 array_kv_pair = seq_get(json_object.expressions, 0) 734 735 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 736 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 737 if ( 738 array_kv_pair 739 and isinstance(array_kv_pair.this, exp.Array) 740 and isinstance(array_kv_pair.expression, exp.Array) 741 ): 742 keys = array_kv_pair.this.expressions 743 values = array_kv_pair.expression.expressions 744 745 json_object.set( 746 "expressions", 747 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 748 ) 749 750 return json_object 751 752 def _parse_bracket( 753 self, this: t.Optional[exp.Expression] = None 754 ) -> t.Optional[exp.Expression]: 755 bracket = super()._parse_bracket(this) 756 757 if this is bracket: 758 return bracket 759 760 if isinstance(bracket, exp.Bracket): 761 for expression in bracket.expressions: 762 name = expression.name.upper() 763 764 if name not in self.BRACKET_OFFSETS: 765 break 766 767 offset, safe = self.BRACKET_OFFSETS[name] 768 bracket.set("offset", offset) 769 bracket.set("safe", safe) 770 expression.replace(expression.expressions[0]) 771 772 return bracket 773 774 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 775 unnest = super()._parse_unnest(with_alias=with_alias) 776 777 if not unnest: 778 return None 779 780 unnest_expr = seq_get(unnest.expressions, 0) 781 if unnest_expr: 782 from sqlglot.optimizer.annotate_types import annotate_types 783 784 unnest_expr = annotate_types(unnest_expr) 785 786 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 787 # in contrast to other dialects such as DuckDB which flattens only the array by default 788 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 789 array_elem.is_type(exp.DataType.Type.STRUCT) 790 for array_elem in unnest_expr._type.expressions 791 ): 792 unnest.set("explode_array", True) 793 794 return unnest 795 796 def _parse_make_interval(self) -> exp.MakeInterval: 797 expr = exp.MakeInterval() 798 799 for arg_key in expr.arg_types: 800 value = self._parse_lambda() 801 802 if not value: 803 break 804 805 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 806 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 807 if isinstance(value, exp.Kwarg): 808 arg_key = value.this.name 809 810 expr.set(arg_key, value) 811 812 self._match(TokenType.COMMA) 813 814 return expr 815 816 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 817 expr = self.expression( 818 exp.FeaturesAtTime, 819 this=(self._match(TokenType.TABLE) and self._parse_table()) 820 or self._parse_select(nested=True), 821 ) 822 823 while self._match(TokenType.COMMA): 824 arg = self._parse_lambda() 825 826 # Get the LHS of the Kwarg and set the arg to that value, e.g 827 # "num_rows => 1" sets the expr's `num_rows` arg 828 if arg: 829 expr.set(arg.this.name, arg) 830 831 return expr 832 833 class Generator(generator.Generator): 834 INTERVAL_ALLOWS_PLURAL_FORM = False 835 JOIN_HINTS = False 836 QUERY_HINTS = False 837 TABLE_HINTS = False 838 LIMIT_FETCH = "LIMIT" 839 RENAME_TABLE_WITH_DB = False 840 NVL2_SUPPORTED = False 841 UNNEST_WITH_ORDINALITY = False 842 COLLATE_IS_FUNC = True 843 LIMIT_ONLY_LITERALS = True 844 SUPPORTS_TABLE_ALIAS_COLUMNS = False 845 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 846 JSON_KEY_VALUE_PAIR_SEP = "," 847 NULL_ORDERING_SUPPORTED = False 848 IGNORE_NULLS_IN_FUNC = True 849 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 850 CAN_IMPLEMENT_ARRAY_ANY = True 851 SUPPORTS_TO_NUMBER = False 852 NAMED_PLACEHOLDER_TOKEN = "@" 853 HEX_FUNC = "TO_HEX" 854 WITH_PROPERTIES_PREFIX = "OPTIONS" 855 SUPPORTS_EXPLODING_PROJECTIONS = False 856 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 857 SUPPORTS_UNIX_SECONDS = True 858 859 TRANSFORMS = { 860 **generator.Generator.TRANSFORMS, 861 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 862 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 863 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 864 exp.Array: inline_array_unless_query, 865 exp.ArrayContains: _array_contains_sql, 866 exp.ArrayFilter: filter_array_using_unnest, 867 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 868 exp.CollateProperty: lambda self, e: ( 869 f"DEFAULT COLLATE {self.sql(e, 'this')}" 870 if e.args.get("default") 871 else f"COLLATE {self.sql(e, 'this')}" 872 ), 873 exp.Commit: lambda *_: "COMMIT TRANSACTION", 874 exp.CountIf: rename_func("COUNTIF"), 875 exp.Create: _create_sql, 876 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 877 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 878 exp.DateDiff: lambda self, e: self.func( 879 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 880 ), 881 exp.DateFromParts: rename_func("DATE"), 882 exp.DateStrToDate: datestrtodate_sql, 883 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 884 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 885 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 886 exp.DateTrunc: lambda self, e: self.func( 887 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 888 ), 889 exp.FromTimeZone: lambda self, e: self.func( 890 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 891 ), 892 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 893 exp.GroupConcat: rename_func("STRING_AGG"), 894 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 895 exp.If: if_sql(false_value="NULL"), 896 exp.ILike: no_ilike_sql, 897 exp.IntDiv: rename_func("DIV"), 898 exp.Int64: rename_func("INT64"), 899 exp.JSONExtract: _json_extract_sql, 900 exp.JSONExtractArray: _json_extract_sql, 901 exp.JSONExtractScalar: _json_extract_sql, 902 exp.JSONFormat: rename_func("TO_JSON_STRING"), 903 exp.Levenshtein: _levenshtein_sql, 904 exp.Max: max_or_greatest, 905 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 906 exp.MD5Digest: rename_func("MD5"), 907 exp.Min: min_or_least, 908 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 909 exp.RegexpExtract: lambda self, e: self.func( 910 "REGEXP_EXTRACT", 911 e.this, 912 e.expression, 913 e.args.get("position"), 914 e.args.get("occurrence"), 915 ), 916 exp.RegexpExtractAll: lambda self, e: self.func( 917 "REGEXP_EXTRACT_ALL", e.this, e.expression 918 ), 919 exp.RegexpReplace: regexp_replace_sql, 920 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 921 exp.ReturnsProperty: _returnsproperty_sql, 922 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 923 exp.Select: transforms.preprocess( 924 [ 925 transforms.explode_to_unnest(), 926 transforms.unqualify_unnest, 927 transforms.eliminate_distinct_on, 928 _alias_ordered_group, 929 transforms.eliminate_semi_and_anti_joins, 930 ] 931 ), 932 exp.SHA: rename_func("SHA1"), 933 exp.SHA2: sha256_sql, 934 exp.StabilityProperty: lambda self, e: ( 935 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 936 ), 937 exp.String: rename_func("STRING"), 938 exp.StrPosition: str_position_sql, 939 exp.StrToDate: _str_to_datetime_sql, 940 exp.StrToTime: _str_to_datetime_sql, 941 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 942 exp.TimeFromParts: rename_func("TIME"), 943 exp.TimestampFromParts: rename_func("DATETIME"), 944 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 945 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 946 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 947 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 948 exp.TimeStrToTime: timestrtotime_sql, 949 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 950 exp.TsOrDsAdd: _ts_or_ds_add_sql, 951 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 952 exp.TsOrDsToTime: rename_func("TIME"), 953 exp.TsOrDsToDatetime: rename_func("DATETIME"), 954 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 955 exp.Unhex: rename_func("FROM_HEX"), 956 exp.UnixDate: rename_func("UNIX_DATE"), 957 exp.UnixToTime: _unix_to_time_sql, 958 exp.Uuid: lambda *_: "GENERATE_UUID()", 959 exp.Values: _derived_table_values_to_unnest, 960 exp.VariancePop: rename_func("VAR_POP"), 961 } 962 963 SUPPORTED_JSON_PATH_PARTS = { 964 exp.JSONPathKey, 965 exp.JSONPathRoot, 966 exp.JSONPathSubscript, 967 } 968 969 TYPE_MAPPING = { 970 **generator.Generator.TYPE_MAPPING, 971 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 972 exp.DataType.Type.BIGINT: "INT64", 973 exp.DataType.Type.BINARY: "BYTES", 974 exp.DataType.Type.BOOLEAN: "BOOL", 975 exp.DataType.Type.CHAR: "STRING", 976 exp.DataType.Type.DECIMAL: "NUMERIC", 977 exp.DataType.Type.DOUBLE: "FLOAT64", 978 exp.DataType.Type.FLOAT: "FLOAT64", 979 exp.DataType.Type.INT: "INT64", 980 exp.DataType.Type.NCHAR: "STRING", 981 exp.DataType.Type.NVARCHAR: "STRING", 982 exp.DataType.Type.SMALLINT: "INT64", 983 exp.DataType.Type.TEXT: "STRING", 984 exp.DataType.Type.TIMESTAMP: "DATETIME", 985 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 986 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 987 exp.DataType.Type.TINYINT: "INT64", 988 exp.DataType.Type.ROWVERSION: "BYTES", 989 exp.DataType.Type.UUID: "STRING", 990 exp.DataType.Type.VARBINARY: "BYTES", 991 exp.DataType.Type.VARCHAR: "STRING", 992 exp.DataType.Type.VARIANT: "ANY TYPE", 993 } 994 995 PROPERTIES_LOCATION = { 996 **generator.Generator.PROPERTIES_LOCATION, 997 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 998 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 999 } 1000 1001 # WINDOW comes after QUALIFY 1002 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1003 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1004 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1005 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1006 } 1007 1008 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1009 RESERVED_KEYWORDS = { 1010 "all", 1011 "and", 1012 "any", 1013 "array", 1014 "as", 1015 "asc", 1016 "assert_rows_modified", 1017 "at", 1018 "between", 1019 "by", 1020 "case", 1021 "cast", 1022 "collate", 1023 "contains", 1024 "create", 1025 "cross", 1026 "cube", 1027 "current", 1028 "default", 1029 "define", 1030 "desc", 1031 "distinct", 1032 "else", 1033 "end", 1034 "enum", 1035 "escape", 1036 "except", 1037 "exclude", 1038 "exists", 1039 "extract", 1040 "false", 1041 "fetch", 1042 "following", 1043 "for", 1044 "from", 1045 "full", 1046 "group", 1047 "grouping", 1048 "groups", 1049 "hash", 1050 "having", 1051 "if", 1052 "ignore", 1053 "in", 1054 "inner", 1055 "intersect", 1056 "interval", 1057 "into", 1058 "is", 1059 "join", 1060 "lateral", 1061 "left", 1062 "like", 1063 "limit", 1064 "lookup", 1065 "merge", 1066 "natural", 1067 "new", 1068 "no", 1069 "not", 1070 "null", 1071 "nulls", 1072 "of", 1073 "on", 1074 "or", 1075 "order", 1076 "outer", 1077 "over", 1078 "partition", 1079 "preceding", 1080 "proto", 1081 "qualify", 1082 "range", 1083 "recursive", 1084 "respect", 1085 "right", 1086 "rollup", 1087 "rows", 1088 "select", 1089 "set", 1090 "some", 1091 "struct", 1092 "tablesample", 1093 "then", 1094 "to", 1095 "treat", 1096 "true", 1097 "unbounded", 1098 "union", 1099 "unnest", 1100 "using", 1101 "when", 1102 "where", 1103 "window", 1104 "with", 1105 "within", 1106 } 1107 1108 def mod_sql(self, expression: exp.Mod) -> str: 1109 this = expression.this 1110 expr = expression.expression 1111 return self.func( 1112 "MOD", 1113 this.unnest() if isinstance(this, exp.Paren) else this, 1114 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1115 ) 1116 1117 def column_parts(self, expression: exp.Column) -> str: 1118 if expression.meta.get("quoted_column"): 1119 # If a column reference is of the form `dataset.table`.name, we need 1120 # to preserve the quoted table path, otherwise the reference breaks 1121 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1122 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1123 return f"{table_path}.{self.sql(expression, 'this')}" 1124 1125 return super().column_parts(expression) 1126 1127 def table_parts(self, expression: exp.Table) -> str: 1128 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1129 # we need to make sure the correct quoting is used in each case. 1130 # 1131 # For example, if there is a CTE x that clashes with a schema name, then the former will 1132 # return the table y in that schema, whereas the latter will return the CTE's y column: 1133 # 1134 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1135 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1136 if expression.meta.get("quoted_table"): 1137 table_parts = ".".join(p.name for p in expression.parts) 1138 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1139 1140 return super().table_parts(expression) 1141 1142 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1143 this = expression.this 1144 if isinstance(this, exp.TsOrDsToDatetime): 1145 func_name = "FORMAT_DATETIME" 1146 elif isinstance(this, exp.TsOrDsToTimestamp): 1147 func_name = "FORMAT_TIMESTAMP" 1148 else: 1149 func_name = "FORMAT_DATE" 1150 1151 time_expr = ( 1152 this 1153 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1154 else expression 1155 ) 1156 return self.func(func_name, self.format_time(expression), time_expr.this) 1157 1158 def eq_sql(self, expression: exp.EQ) -> str: 1159 # Operands of = cannot be NULL in BigQuery 1160 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1161 if not isinstance(expression.parent, exp.Update): 1162 return "NULL" 1163 1164 return self.binary(expression, "=") 1165 1166 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1167 parent = expression.parent 1168 1169 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1170 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1171 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1172 return self.func( 1173 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1174 ) 1175 1176 return super().attimezone_sql(expression) 1177 1178 def trycast_sql(self, expression: exp.TryCast) -> str: 1179 return self.cast_sql(expression, safe_prefix="SAFE_") 1180 1181 def bracket_sql(self, expression: exp.Bracket) -> str: 1182 this = expression.this 1183 expressions = expression.expressions 1184 1185 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1186 arg = expressions[0] 1187 if arg.type is None: 1188 from sqlglot.optimizer.annotate_types import annotate_types 1189 1190 arg = annotate_types(arg) 1191 1192 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1193 # BQ doesn't support bracket syntax with string values for structs 1194 return f"{self.sql(this)}.{arg.name}" 1195 1196 expressions_sql = self.expressions(expression, flat=True) 1197 offset = expression.args.get("offset") 1198 1199 if offset == 0: 1200 expressions_sql = f"OFFSET({expressions_sql})" 1201 elif offset == 1: 1202 expressions_sql = f"ORDINAL({expressions_sql})" 1203 elif offset is not None: 1204 self.unsupported(f"Unsupported array offset: {offset}") 1205 1206 if expression.args.get("safe"): 1207 expressions_sql = f"SAFE_{expressions_sql}" 1208 1209 return f"{self.sql(this)}[{expressions_sql}]" 1210 1211 def in_unnest_op(self, expression: exp.Unnest) -> str: 1212 return self.sql(expression) 1213 1214 def version_sql(self, expression: exp.Version) -> str: 1215 if expression.name == "TIMESTAMP": 1216 expression.set("this", "SYSTEM_TIME") 1217 return super().version_sql(expression) 1218 1219 def contains_sql(self, expression: exp.Contains) -> str: 1220 this = expression.this 1221 expr = expression.expression 1222 1223 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1224 this = this.this 1225 expr = expr.this 1226 1227 return self.func("CONTAINS_SUBSTR", this, expr)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
423 def normalize_identifier(self, expression: E) -> E: 424 if ( 425 isinstance(expression, exp.Identifier) 426 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 427 ): 428 parent = expression.parent 429 while isinstance(parent, exp.Dot): 430 parent = parent.parent 431 432 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 433 # by default. The following check uses a heuristic to detect tables based on whether 434 # they are qualified. This should generally be correct, because tables in BigQuery 435 # must be qualified with at least a dataset, unless @@dataset_id is set. 436 case_sensitive = ( 437 isinstance(parent, exp.UserDefinedFunction) 438 or ( 439 isinstance(parent, exp.Table) 440 and parent.db 441 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 442 ) 443 or expression.meta.get("is_table") 444 ) 445 if not case_sensitive: 446 expression.set("this", expression.this.lower()) 447 448 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- NUMBERS_CAN_BE_UNDERSCORE_SEPARATED
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
450 class Tokenizer(tokens.Tokenizer): 451 QUOTES = ["'", '"', '"""', "'''"] 452 COMMENTS = ["--", "#", ("/*", "*/")] 453 IDENTIFIERS = ["`"] 454 STRING_ESCAPES = ["\\"] 455 456 HEX_STRINGS = [("0x", ""), ("0X", "")] 457 458 BYTE_STRINGS = [ 459 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 460 ] 461 462 RAW_STRINGS = [ 463 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 464 ] 465 466 KEYWORDS = { 467 **tokens.Tokenizer.KEYWORDS, 468 "ANY TYPE": TokenType.VARIANT, 469 "BEGIN": TokenType.COMMAND, 470 "BEGIN TRANSACTION": TokenType.BEGIN, 471 "BYTEINT": TokenType.INT, 472 "BYTES": TokenType.BINARY, 473 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 474 "DATETIME": TokenType.TIMESTAMP, 475 "DECLARE": TokenType.COMMAND, 476 "ELSEIF": TokenType.COMMAND, 477 "EXCEPTION": TokenType.COMMAND, 478 "FLOAT64": TokenType.DOUBLE, 479 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 480 "MODEL": TokenType.MODEL, 481 "NOT DETERMINISTIC": TokenType.VOLATILE, 482 "RECORD": TokenType.STRUCT, 483 "TIMESTAMP": TokenType.TIMESTAMPTZ, 484 } 485 KEYWORDS.pop("DIV") 486 KEYWORDS.pop("VALUES") 487 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
489 class Parser(parser.Parser): 490 PREFIXED_PIVOT_COLUMNS = True 491 LOG_DEFAULTS_TO_LN = True 492 SUPPORTS_IMPLICIT_UNNEST = True 493 494 FUNCTIONS = { 495 **parser.Parser.FUNCTIONS, 496 "CONTAINS_SUBSTR": _build_contains_substring, 497 "DATE": _build_date, 498 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 499 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 500 "DATE_TRUNC": lambda args: exp.DateTrunc( 501 unit=exp.Literal.string(str(seq_get(args, 1))), 502 this=seq_get(args, 0), 503 zone=seq_get(args, 2), 504 ), 505 "DATETIME": _build_datetime, 506 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 507 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 508 "DIV": binary_from_function(exp.IntDiv), 509 "EDIT_DISTANCE": _build_levenshtein, 510 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 511 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 512 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 513 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 514 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 515 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 517 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 518 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 519 "MD5": exp.MD5Digest.from_arg_list, 520 "TO_HEX": _build_to_hex, 521 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 522 [seq_get(args, 1), seq_get(args, 0)] 523 ), 524 "PARSE_TIMESTAMP": _build_parse_timestamp, 525 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 526 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 527 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 528 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 529 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 530 ), 531 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 532 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 533 "SPLIT": lambda args: exp.Split( 534 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 535 this=seq_get(args, 0), 536 expression=seq_get(args, 1) or exp.Literal.string(","), 537 ), 538 "STRPOS": exp.StrPosition.from_arg_list, 539 "TIME": _build_time, 540 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 541 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 542 "TIMESTAMP": _build_timestamp, 543 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 544 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 545 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 546 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 547 ), 548 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 549 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 550 ), 551 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 552 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 553 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 554 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 555 } 556 557 FUNCTION_PARSERS = { 558 **parser.Parser.FUNCTION_PARSERS, 559 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 560 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 561 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 562 } 563 FUNCTION_PARSERS.pop("TRIM") 564 565 NO_PAREN_FUNCTIONS = { 566 **parser.Parser.NO_PAREN_FUNCTIONS, 567 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 568 } 569 570 NESTED_TYPE_TOKENS = { 571 *parser.Parser.NESTED_TYPE_TOKENS, 572 TokenType.TABLE, 573 } 574 575 PROPERTY_PARSERS = { 576 **parser.Parser.PROPERTY_PARSERS, 577 "NOT DETERMINISTIC": lambda self: self.expression( 578 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 579 ), 580 "OPTIONS": lambda self: self._parse_with_property(), 581 } 582 583 CONSTRAINT_PARSERS = { 584 **parser.Parser.CONSTRAINT_PARSERS, 585 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 586 } 587 588 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 589 RANGE_PARSERS.pop(TokenType.OVERLAPS) 590 591 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 592 593 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 594 595 STATEMENT_PARSERS = { 596 **parser.Parser.STATEMENT_PARSERS, 597 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 598 TokenType.END: lambda self: self._parse_as_command(self._prev), 599 TokenType.FOR: lambda self: self._parse_for_in(), 600 } 601 602 BRACKET_OFFSETS = { 603 "OFFSET": (0, False), 604 "ORDINAL": (1, False), 605 "SAFE_OFFSET": (0, True), 606 "SAFE_ORDINAL": (1, True), 607 } 608 609 def _parse_for_in(self) -> exp.ForIn: 610 this = self._parse_range() 611 self._match_text_seq("DO") 612 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 613 614 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 615 this = super()._parse_table_part(schema=schema) or self._parse_number() 616 617 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 618 if isinstance(this, exp.Identifier): 619 table_name = this.name 620 while self._match(TokenType.DASH, advance=False) and self._next: 621 start = self._curr 622 while self._is_connected() and not self._match_set( 623 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 624 ): 625 self._advance() 626 627 table_name += self._find_sql(start, self._prev) 628 629 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 630 elif isinstance(this, exp.Literal): 631 table_name = this.name 632 633 if self._is_connected() and self._parse_var(any_token=True): 634 table_name += self._prev.text 635 636 this = exp.Identifier(this=table_name, quoted=True) 637 638 return this 639 640 def _parse_table_parts( 641 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 642 ) -> exp.Table: 643 table = super()._parse_table_parts( 644 schema=schema, is_db_reference=is_db_reference, wildcard=True 645 ) 646 647 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 648 if not table.catalog: 649 if table.db: 650 parts = table.db.split(".") 651 if len(parts) == 2 and not table.args["db"].quoted: 652 table.set("catalog", exp.Identifier(this=parts[0])) 653 table.set("db", exp.Identifier(this=parts[1])) 654 else: 655 parts = table.name.split(".") 656 if len(parts) == 2 and not table.this.quoted: 657 table.set("db", exp.Identifier(this=parts[0])) 658 table.set("this", exp.Identifier(this=parts[1])) 659 660 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 661 alias = table.this 662 catalog, db, this, *rest = ( 663 exp.to_identifier(p, quoted=True) 664 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 665 ) 666 667 if rest and this: 668 this = exp.Dot.build([this, *rest]) # type: ignore 669 670 table = exp.Table( 671 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 672 ) 673 table.meta["quoted_table"] = True 674 else: 675 alias = None 676 677 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 678 # dataset, so if the project identifier is omitted we need to fix the ast so that 679 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 680 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 681 # views, because it would seem like the "catalog" part is set, when it'd actually 682 # be the region/dataset. Merging the two identifiers into a single one is done to 683 # avoid producing a 4-part Table reference, which would cause issues in the schema 684 # module, when there are 3-part table names mixed with information schema views. 685 # 686 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 687 table_parts = table.parts 688 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 689 # We need to alias the table here to avoid breaking existing qualified columns. 690 # This is expected to be safe, because if there's an actual alias coming up in 691 # the token stream, it will overwrite this one. If there isn't one, we are only 692 # exposing the name that can be used to reference the view explicitly (a no-op). 693 exp.alias_( 694 table, 695 t.cast(exp.Identifier, alias or table_parts[-1]), 696 table=True, 697 copy=False, 698 ) 699 700 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 701 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 702 table.set("db", seq_get(table_parts, -3)) 703 table.set("catalog", seq_get(table_parts, -4)) 704 705 return table 706 707 def _parse_column(self) -> t.Optional[exp.Expression]: 708 column = super()._parse_column() 709 if isinstance(column, exp.Column): 710 parts = column.parts 711 if any("." in p.name for p in parts): 712 catalog, db, table, this, *rest = ( 713 exp.to_identifier(p, quoted=True) 714 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 715 ) 716 717 if rest and this: 718 this = exp.Dot.build([this, *rest]) # type: ignore 719 720 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 721 column.meta["quoted_column"] = True 722 723 return column 724 725 @t.overload 726 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 727 728 @t.overload 729 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 730 731 def _parse_json_object(self, agg=False): 732 json_object = super()._parse_json_object() 733 array_kv_pair = seq_get(json_object.expressions, 0) 734 735 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 736 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 737 if ( 738 array_kv_pair 739 and isinstance(array_kv_pair.this, exp.Array) 740 and isinstance(array_kv_pair.expression, exp.Array) 741 ): 742 keys = array_kv_pair.this.expressions 743 values = array_kv_pair.expression.expressions 744 745 json_object.set( 746 "expressions", 747 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 748 ) 749 750 return json_object 751 752 def _parse_bracket( 753 self, this: t.Optional[exp.Expression] = None 754 ) -> t.Optional[exp.Expression]: 755 bracket = super()._parse_bracket(this) 756 757 if this is bracket: 758 return bracket 759 760 if isinstance(bracket, exp.Bracket): 761 for expression in bracket.expressions: 762 name = expression.name.upper() 763 764 if name not in self.BRACKET_OFFSETS: 765 break 766 767 offset, safe = self.BRACKET_OFFSETS[name] 768 bracket.set("offset", offset) 769 bracket.set("safe", safe) 770 expression.replace(expression.expressions[0]) 771 772 return bracket 773 774 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 775 unnest = super()._parse_unnest(with_alias=with_alias) 776 777 if not unnest: 778 return None 779 780 unnest_expr = seq_get(unnest.expressions, 0) 781 if unnest_expr: 782 from sqlglot.optimizer.annotate_types import annotate_types 783 784 unnest_expr = annotate_types(unnest_expr) 785 786 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 787 # in contrast to other dialects such as DuckDB which flattens only the array by default 788 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 789 array_elem.is_type(exp.DataType.Type.STRUCT) 790 for array_elem in unnest_expr._type.expressions 791 ): 792 unnest.set("explode_array", True) 793 794 return unnest 795 796 def _parse_make_interval(self) -> exp.MakeInterval: 797 expr = exp.MakeInterval() 798 799 for arg_key in expr.arg_types: 800 value = self._parse_lambda() 801 802 if not value: 803 break 804 805 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 806 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 807 if isinstance(value, exp.Kwarg): 808 arg_key = value.this.name 809 810 expr.set(arg_key, value) 811 812 self._match(TokenType.COMMA) 813 814 return expr 815 816 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 817 expr = self.expression( 818 exp.FeaturesAtTime, 819 this=(self._match(TokenType.TABLE) and self._parse_table()) 820 or self._parse_select(nested=True), 821 ) 822 823 while self._match(TokenType.COMMA): 824 arg = self._parse_lambda() 825 826 # Get the LHS of the Kwarg and set the arg to that value, e.g 827 # "num_rows => 1" sets the expr's `num_rows` arg 828 if arg: 829 expr.set(arg.this.name, arg) 830 831 return expr
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
833 class Generator(generator.Generator): 834 INTERVAL_ALLOWS_PLURAL_FORM = False 835 JOIN_HINTS = False 836 QUERY_HINTS = False 837 TABLE_HINTS = False 838 LIMIT_FETCH = "LIMIT" 839 RENAME_TABLE_WITH_DB = False 840 NVL2_SUPPORTED = False 841 UNNEST_WITH_ORDINALITY = False 842 COLLATE_IS_FUNC = True 843 LIMIT_ONLY_LITERALS = True 844 SUPPORTS_TABLE_ALIAS_COLUMNS = False 845 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 846 JSON_KEY_VALUE_PAIR_SEP = "," 847 NULL_ORDERING_SUPPORTED = False 848 IGNORE_NULLS_IN_FUNC = True 849 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 850 CAN_IMPLEMENT_ARRAY_ANY = True 851 SUPPORTS_TO_NUMBER = False 852 NAMED_PLACEHOLDER_TOKEN = "@" 853 HEX_FUNC = "TO_HEX" 854 WITH_PROPERTIES_PREFIX = "OPTIONS" 855 SUPPORTS_EXPLODING_PROJECTIONS = False 856 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 857 SUPPORTS_UNIX_SECONDS = True 858 859 TRANSFORMS = { 860 **generator.Generator.TRANSFORMS, 861 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 862 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 863 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 864 exp.Array: inline_array_unless_query, 865 exp.ArrayContains: _array_contains_sql, 866 exp.ArrayFilter: filter_array_using_unnest, 867 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 868 exp.CollateProperty: lambda self, e: ( 869 f"DEFAULT COLLATE {self.sql(e, 'this')}" 870 if e.args.get("default") 871 else f"COLLATE {self.sql(e, 'this')}" 872 ), 873 exp.Commit: lambda *_: "COMMIT TRANSACTION", 874 exp.CountIf: rename_func("COUNTIF"), 875 exp.Create: _create_sql, 876 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 877 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 878 exp.DateDiff: lambda self, e: self.func( 879 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 880 ), 881 exp.DateFromParts: rename_func("DATE"), 882 exp.DateStrToDate: datestrtodate_sql, 883 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 884 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 885 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 886 exp.DateTrunc: lambda self, e: self.func( 887 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 888 ), 889 exp.FromTimeZone: lambda self, e: self.func( 890 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 891 ), 892 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 893 exp.GroupConcat: rename_func("STRING_AGG"), 894 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 895 exp.If: if_sql(false_value="NULL"), 896 exp.ILike: no_ilike_sql, 897 exp.IntDiv: rename_func("DIV"), 898 exp.Int64: rename_func("INT64"), 899 exp.JSONExtract: _json_extract_sql, 900 exp.JSONExtractArray: _json_extract_sql, 901 exp.JSONExtractScalar: _json_extract_sql, 902 exp.JSONFormat: rename_func("TO_JSON_STRING"), 903 exp.Levenshtein: _levenshtein_sql, 904 exp.Max: max_or_greatest, 905 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 906 exp.MD5Digest: rename_func("MD5"), 907 exp.Min: min_or_least, 908 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 909 exp.RegexpExtract: lambda self, e: self.func( 910 "REGEXP_EXTRACT", 911 e.this, 912 e.expression, 913 e.args.get("position"), 914 e.args.get("occurrence"), 915 ), 916 exp.RegexpExtractAll: lambda self, e: self.func( 917 "REGEXP_EXTRACT_ALL", e.this, e.expression 918 ), 919 exp.RegexpReplace: regexp_replace_sql, 920 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 921 exp.ReturnsProperty: _returnsproperty_sql, 922 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 923 exp.Select: transforms.preprocess( 924 [ 925 transforms.explode_to_unnest(), 926 transforms.unqualify_unnest, 927 transforms.eliminate_distinct_on, 928 _alias_ordered_group, 929 transforms.eliminate_semi_and_anti_joins, 930 ] 931 ), 932 exp.SHA: rename_func("SHA1"), 933 exp.SHA2: sha256_sql, 934 exp.StabilityProperty: lambda self, e: ( 935 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 936 ), 937 exp.String: rename_func("STRING"), 938 exp.StrPosition: str_position_sql, 939 exp.StrToDate: _str_to_datetime_sql, 940 exp.StrToTime: _str_to_datetime_sql, 941 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 942 exp.TimeFromParts: rename_func("TIME"), 943 exp.TimestampFromParts: rename_func("DATETIME"), 944 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 945 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 946 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 947 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 948 exp.TimeStrToTime: timestrtotime_sql, 949 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 950 exp.TsOrDsAdd: _ts_or_ds_add_sql, 951 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 952 exp.TsOrDsToTime: rename_func("TIME"), 953 exp.TsOrDsToDatetime: rename_func("DATETIME"), 954 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 955 exp.Unhex: rename_func("FROM_HEX"), 956 exp.UnixDate: rename_func("UNIX_DATE"), 957 exp.UnixToTime: _unix_to_time_sql, 958 exp.Uuid: lambda *_: "GENERATE_UUID()", 959 exp.Values: _derived_table_values_to_unnest, 960 exp.VariancePop: rename_func("VAR_POP"), 961 } 962 963 SUPPORTED_JSON_PATH_PARTS = { 964 exp.JSONPathKey, 965 exp.JSONPathRoot, 966 exp.JSONPathSubscript, 967 } 968 969 TYPE_MAPPING = { 970 **generator.Generator.TYPE_MAPPING, 971 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 972 exp.DataType.Type.BIGINT: "INT64", 973 exp.DataType.Type.BINARY: "BYTES", 974 exp.DataType.Type.BOOLEAN: "BOOL", 975 exp.DataType.Type.CHAR: "STRING", 976 exp.DataType.Type.DECIMAL: "NUMERIC", 977 exp.DataType.Type.DOUBLE: "FLOAT64", 978 exp.DataType.Type.FLOAT: "FLOAT64", 979 exp.DataType.Type.INT: "INT64", 980 exp.DataType.Type.NCHAR: "STRING", 981 exp.DataType.Type.NVARCHAR: "STRING", 982 exp.DataType.Type.SMALLINT: "INT64", 983 exp.DataType.Type.TEXT: "STRING", 984 exp.DataType.Type.TIMESTAMP: "DATETIME", 985 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 986 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 987 exp.DataType.Type.TINYINT: "INT64", 988 exp.DataType.Type.ROWVERSION: "BYTES", 989 exp.DataType.Type.UUID: "STRING", 990 exp.DataType.Type.VARBINARY: "BYTES", 991 exp.DataType.Type.VARCHAR: "STRING", 992 exp.DataType.Type.VARIANT: "ANY TYPE", 993 } 994 995 PROPERTIES_LOCATION = { 996 **generator.Generator.PROPERTIES_LOCATION, 997 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 998 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 999 } 1000 1001 # WINDOW comes after QUALIFY 1002 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1003 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1004 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1005 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1006 } 1007 1008 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1009 RESERVED_KEYWORDS = { 1010 "all", 1011 "and", 1012 "any", 1013 "array", 1014 "as", 1015 "asc", 1016 "assert_rows_modified", 1017 "at", 1018 "between", 1019 "by", 1020 "case", 1021 "cast", 1022 "collate", 1023 "contains", 1024 "create", 1025 "cross", 1026 "cube", 1027 "current", 1028 "default", 1029 "define", 1030 "desc", 1031 "distinct", 1032 "else", 1033 "end", 1034 "enum", 1035 "escape", 1036 "except", 1037 "exclude", 1038 "exists", 1039 "extract", 1040 "false", 1041 "fetch", 1042 "following", 1043 "for", 1044 "from", 1045 "full", 1046 "group", 1047 "grouping", 1048 "groups", 1049 "hash", 1050 "having", 1051 "if", 1052 "ignore", 1053 "in", 1054 "inner", 1055 "intersect", 1056 "interval", 1057 "into", 1058 "is", 1059 "join", 1060 "lateral", 1061 "left", 1062 "like", 1063 "limit", 1064 "lookup", 1065 "merge", 1066 "natural", 1067 "new", 1068 "no", 1069 "not", 1070 "null", 1071 "nulls", 1072 "of", 1073 "on", 1074 "or", 1075 "order", 1076 "outer", 1077 "over", 1078 "partition", 1079 "preceding", 1080 "proto", 1081 "qualify", 1082 "range", 1083 "recursive", 1084 "respect", 1085 "right", 1086 "rollup", 1087 "rows", 1088 "select", 1089 "set", 1090 "some", 1091 "struct", 1092 "tablesample", 1093 "then", 1094 "to", 1095 "treat", 1096 "true", 1097 "unbounded", 1098 "union", 1099 "unnest", 1100 "using", 1101 "when", 1102 "where", 1103 "window", 1104 "with", 1105 "within", 1106 } 1107 1108 def mod_sql(self, expression: exp.Mod) -> str: 1109 this = expression.this 1110 expr = expression.expression 1111 return self.func( 1112 "MOD", 1113 this.unnest() if isinstance(this, exp.Paren) else this, 1114 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1115 ) 1116 1117 def column_parts(self, expression: exp.Column) -> str: 1118 if expression.meta.get("quoted_column"): 1119 # If a column reference is of the form `dataset.table`.name, we need 1120 # to preserve the quoted table path, otherwise the reference breaks 1121 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1122 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1123 return f"{table_path}.{self.sql(expression, 'this')}" 1124 1125 return super().column_parts(expression) 1126 1127 def table_parts(self, expression: exp.Table) -> str: 1128 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1129 # we need to make sure the correct quoting is used in each case. 1130 # 1131 # For example, if there is a CTE x that clashes with a schema name, then the former will 1132 # return the table y in that schema, whereas the latter will return the CTE's y column: 1133 # 1134 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1135 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1136 if expression.meta.get("quoted_table"): 1137 table_parts = ".".join(p.name for p in expression.parts) 1138 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1139 1140 return super().table_parts(expression) 1141 1142 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1143 this = expression.this 1144 if isinstance(this, exp.TsOrDsToDatetime): 1145 func_name = "FORMAT_DATETIME" 1146 elif isinstance(this, exp.TsOrDsToTimestamp): 1147 func_name = "FORMAT_TIMESTAMP" 1148 else: 1149 func_name = "FORMAT_DATE" 1150 1151 time_expr = ( 1152 this 1153 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1154 else expression 1155 ) 1156 return self.func(func_name, self.format_time(expression), time_expr.this) 1157 1158 def eq_sql(self, expression: exp.EQ) -> str: 1159 # Operands of = cannot be NULL in BigQuery 1160 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1161 if not isinstance(expression.parent, exp.Update): 1162 return "NULL" 1163 1164 return self.binary(expression, "=") 1165 1166 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1167 parent = expression.parent 1168 1169 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1170 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1171 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1172 return self.func( 1173 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1174 ) 1175 1176 return super().attimezone_sql(expression) 1177 1178 def trycast_sql(self, expression: exp.TryCast) -> str: 1179 return self.cast_sql(expression, safe_prefix="SAFE_") 1180 1181 def bracket_sql(self, expression: exp.Bracket) -> str: 1182 this = expression.this 1183 expressions = expression.expressions 1184 1185 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1186 arg = expressions[0] 1187 if arg.type is None: 1188 from sqlglot.optimizer.annotate_types import annotate_types 1189 1190 arg = annotate_types(arg) 1191 1192 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1193 # BQ doesn't support bracket syntax with string values for structs 1194 return f"{self.sql(this)}.{arg.name}" 1195 1196 expressions_sql = self.expressions(expression, flat=True) 1197 offset = expression.args.get("offset") 1198 1199 if offset == 0: 1200 expressions_sql = f"OFFSET({expressions_sql})" 1201 elif offset == 1: 1202 expressions_sql = f"ORDINAL({expressions_sql})" 1203 elif offset is not None: 1204 self.unsupported(f"Unsupported array offset: {offset}") 1205 1206 if expression.args.get("safe"): 1207 expressions_sql = f"SAFE_{expressions_sql}" 1208 1209 return f"{self.sql(this)}[{expressions_sql}]" 1210 1211 def in_unnest_op(self, expression: exp.Unnest) -> str: 1212 return self.sql(expression) 1213 1214 def version_sql(self, expression: exp.Version) -> str: 1215 if expression.name == "TIMESTAMP": 1216 expression.set("this", "SYSTEM_TIME") 1217 return super().version_sql(expression) 1218 1219 def contains_sql(self, expression: exp.Contains) -> str: 1220 this = expression.this 1221 expr = expression.expression 1222 1223 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1224 this = this.this 1225 expr = expr.this 1226 1227 return self.func("CONTAINS_SUBSTR", this, expr)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1117 def column_parts(self, expression: exp.Column) -> str: 1118 if expression.meta.get("quoted_column"): 1119 # If a column reference is of the form `dataset.table`.name, we need 1120 # to preserve the quoted table path, otherwise the reference breaks 1121 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1122 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1123 return f"{table_path}.{self.sql(expression, 'this')}" 1124 1125 return super().column_parts(expression)
1127 def table_parts(self, expression: exp.Table) -> str: 1128 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1129 # we need to make sure the correct quoting is used in each case. 1130 # 1131 # For example, if there is a CTE x that clashes with a schema name, then the former will 1132 # return the table y in that schema, whereas the latter will return the CTE's y column: 1133 # 1134 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1135 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1136 if expression.meta.get("quoted_table"): 1137 table_parts = ".".join(p.name for p in expression.parts) 1138 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1139 1140 return super().table_parts(expression)
1142 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1143 this = expression.this 1144 if isinstance(this, exp.TsOrDsToDatetime): 1145 func_name = "FORMAT_DATETIME" 1146 elif isinstance(this, exp.TsOrDsToTimestamp): 1147 func_name = "FORMAT_TIMESTAMP" 1148 else: 1149 func_name = "FORMAT_DATE" 1150 1151 time_expr = ( 1152 this 1153 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1154 else expression 1155 ) 1156 return self.func(func_name, self.format_time(expression), time_expr.this)
1158 def eq_sql(self, expression: exp.EQ) -> str: 1159 # Operands of = cannot be NULL in BigQuery 1160 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1161 if not isinstance(expression.parent, exp.Update): 1162 return "NULL" 1163 1164 return self.binary(expression, "=")
1166 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1167 parent = expression.parent 1168 1169 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1170 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1171 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1172 return self.func( 1173 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1174 ) 1175 1176 return super().attimezone_sql(expression)
1181 def bracket_sql(self, expression: exp.Bracket) -> str: 1182 this = expression.this 1183 expressions = expression.expressions 1184 1185 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1186 arg = expressions[0] 1187 if arg.type is None: 1188 from sqlglot.optimizer.annotate_types import annotate_types 1189 1190 arg = annotate_types(arg) 1191 1192 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1193 # BQ doesn't support bracket syntax with string values for structs 1194 return f"{self.sql(this)}.{arg.name}" 1195 1196 expressions_sql = self.expressions(expression, flat=True) 1197 offset = expression.args.get("offset") 1198 1199 if offset == 0: 1200 expressions_sql = f"OFFSET({expressions_sql})" 1201 elif offset == 1: 1202 expressions_sql = f"ORDINAL({expressions_sql})" 1203 elif offset is not None: 1204 self.unsupported(f"Unsupported array offset: {offset}") 1205 1206 if expression.args.get("safe"): 1207 expressions_sql = f"SAFE_{expressions_sql}" 1208 1209 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql