sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7 8from sqlglot.optimizer.annotate_types import TypeAnnotator 9 10from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 11from sqlglot._typing import E 12from sqlglot.dialects.dialect import ( 13 Dialect, 14 NormalizationStrategy, 15 arg_max_or_min_no_count, 16 binary_from_function, 17 date_add_interval_sql, 18 datestrtodate_sql, 19 build_formatted_time, 20 filter_array_using_unnest, 21 if_sql, 22 inline_array_unless_query, 23 max_or_greatest, 24 min_or_least, 25 no_ilike_sql, 26 build_date_delta_with_interval, 27 regexp_replace_sql, 28 rename_func, 29 sha256_sql, 30 timestrtotime_sql, 31 ts_or_ds_add_cast, 32 unit_to_var, 33 strposition_sql, 34 groupconcat_sql, 35 sha2_digest_sql, 36) 37from sqlglot.generator import unsupported_args 38from sqlglot.helper import seq_get, split_num_words 39from sqlglot.tokens import TokenType 40from sqlglot.typing.bigquery import EXPRESSION_METADATA 41 42if t.TYPE_CHECKING: 43 from sqlglot._typing import Lit 44 45 from sqlglot.optimizer.annotate_types import TypeAnnotator 46 47logger = logging.getLogger("sqlglot") 48 49 50JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 51 52DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 53 54MAKE_INTERVAL_KWARGS = ["year", "month", "day", "hour", "minute", "second"] 55 56 57def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 58 if not expression.find_ancestor(exp.From, exp.Join): 59 return self.values_sql(expression) 60 61 structs = [] 62 alias = expression.args.get("alias") 63 for tup in expression.find_all(exp.Tuple): 64 field_aliases = ( 65 alias.columns 66 if alias and alias.columns 67 else (f"_c{i}" for i in range(len(tup.expressions))) 68 ) 69 expressions = [ 70 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 71 for name, fld in zip(field_aliases, tup.expressions) 72 ] 73 structs.append(exp.Struct(expressions=expressions)) 74 75 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 76 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 77 return self.unnest_sql( 78 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 79 ) 80 81 82def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 83 this = expression.this 84 if isinstance(this, exp.Schema): 85 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 86 else: 87 this = self.sql(this) 88 return f"RETURNS {this}" 89 90 91def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 92 returns = expression.find(exp.ReturnsProperty) 93 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 94 expression.set("kind", "TABLE FUNCTION") 95 96 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 97 expression.set("expression", expression.expression.this) 98 99 return self.create_sql(expression) 100 101 102# https://issuetracker.google.com/issues/162294746 103# workaround for bigquery bug when grouping by an expression and then ordering 104# WITH x AS (SELECT 1 y) 105# SELECT y + 1 z 106# FROM x 107# GROUP BY x + 1 108# ORDER by z 109def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 110 if isinstance(expression, exp.Select): 111 group = expression.args.get("group") 112 order = expression.args.get("order") 113 114 if group and order: 115 aliases = { 116 select.this: select.args["alias"] 117 for select in expression.selects 118 if isinstance(select, exp.Alias) 119 } 120 121 for grouped in group.expressions: 122 if grouped.is_int: 123 continue 124 alias = aliases.get(grouped) 125 if alias: 126 grouped.replace(exp.column(alias)) 127 128 return expression 129 130 131def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 132 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 133 if isinstance(expression, exp.CTE) and expression.alias_column_names: 134 cte_query = expression.this 135 136 if cte_query.is_star: 137 logger.warning( 138 "Can't push down CTE column names for star queries. Run the query through" 139 " the optimizer or use 'qualify' to expand the star projections first." 140 ) 141 return expression 142 143 column_names = expression.alias_column_names 144 expression.args["alias"].set("columns", None) 145 146 for name, select in zip(column_names, cte_query.selects): 147 to_replace = select 148 149 if isinstance(select, exp.Alias): 150 select = select.this 151 152 # Inner aliases are shadowed by the CTE column names 153 to_replace.replace(exp.alias_(select, name)) 154 155 return expression 156 157 158def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 159 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 160 this.set("zone", seq_get(args, 2)) 161 return this 162 163 164def _build_timestamp(args: t.List) -> exp.Timestamp: 165 timestamp = exp.Timestamp.from_arg_list(args) 166 timestamp.set("with_tz", True) 167 return timestamp 168 169 170def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 171 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 172 return expr_type.from_arg_list(args) 173 174 175def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 176 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 177 arg = seq_get(args, 0) 178 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 179 180 181def _build_json_strip_nulls(args: t.List) -> exp.JSONStripNulls: 182 expression = exp.JSONStripNulls(this=seq_get(args, 0)) 183 184 for arg in args[1:]: 185 if isinstance(arg, exp.Kwarg): 186 expression.set(arg.this.name.lower(), arg) 187 else: 188 expression.set("expression", arg) 189 190 return expression 191 192 193def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 194 return self.sql( 195 exp.Exists( 196 this=exp.select("1") 197 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 198 .where(exp.column("_col").eq(expression.right)) 199 ) 200 ) 201 202 203def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 204 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 205 206 207def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 208 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 209 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 210 unit = unit_to_var(expression) 211 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 212 213 214def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 215 scale = expression.args.get("scale") 216 timestamp = expression.this 217 218 if scale in (None, exp.UnixToTime.SECONDS): 219 return self.func("TIMESTAMP_SECONDS", timestamp) 220 if scale == exp.UnixToTime.MILLIS: 221 return self.func("TIMESTAMP_MILLIS", timestamp) 222 if scale == exp.UnixToTime.MICROS: 223 return self.func("TIMESTAMP_MICROS", timestamp) 224 225 unix_seconds = exp.cast( 226 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 227 ) 228 return self.func("TIMESTAMP_SECONDS", unix_seconds) 229 230 231def _build_time(args: t.List) -> exp.Func: 232 if len(args) == 1: 233 return exp.TsOrDsToTime(this=args[0]) 234 if len(args) == 2: 235 return exp.Time.from_arg_list(args) 236 return exp.TimeFromParts.from_arg_list(args) 237 238 239def _build_datetime(args: t.List) -> exp.Func: 240 if len(args) == 1: 241 return exp.TsOrDsToDatetime.from_arg_list(args) 242 if len(args) == 2: 243 return exp.Datetime.from_arg_list(args) 244 return exp.TimestampFromParts.from_arg_list(args) 245 246 247def build_date_diff(args: t.List) -> exp.Expression: 248 expr = exp.DateDiff( 249 this=seq_get(args, 0), 250 expression=seq_get(args, 1), 251 unit=seq_get(args, 2), 252 date_part_boundary=True, 253 ) 254 255 # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation 256 # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too 257 unit = expr.args.get("unit") 258 259 if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": 260 expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY"))) 261 262 return expr 263 264 265def _build_regexp_extract( 266 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 267) -> t.Callable[[t.List, BigQuery], E]: 268 def _builder(args: t.List, dialect: BigQuery) -> E: 269 try: 270 group = re.compile(args[1].name).groups == 1 271 except re.error: 272 group = False 273 274 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 275 return expr_type( 276 this=seq_get(args, 0), 277 expression=seq_get(args, 1), 278 position=seq_get(args, 2), 279 occurrence=seq_get(args, 3), 280 group=exp.Literal.number(1) if group else default_group, 281 **( 282 {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} 283 if expr_type is exp.RegexpExtract 284 else {} 285 ), 286 ) 287 288 return _builder 289 290 291def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 292 def _builder(args: t.List, dialect: Dialect) -> E: 293 if len(args) == 1: 294 # The default value for the JSONPath is '$' i.e all of the data 295 args.append(exp.Literal.string("$")) 296 return parser.build_extract_json_with_path(expr_type)(args, dialect) 297 298 return _builder 299 300 301def _str_to_datetime_sql( 302 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 303) -> str: 304 this = self.sql(expression, "this") 305 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 306 307 if expression.args.get("safe"): 308 fmt = self.format_time( 309 expression, 310 self.dialect.INVERSE_FORMAT_MAPPING, 311 self.dialect.INVERSE_FORMAT_TRIE, 312 ) 313 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 314 315 fmt = self.format_time(expression) 316 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 317 318 319@unsupported_args("ins_cost", "del_cost", "sub_cost") 320def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 321 max_dist = expression.args.get("max_dist") 322 if max_dist: 323 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 324 325 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 326 327 328def _build_levenshtein(args: t.List) -> exp.Levenshtein: 329 max_dist = seq_get(args, 2) 330 return exp.Levenshtein( 331 this=seq_get(args, 0), 332 expression=seq_get(args, 1), 333 max_dist=max_dist.expression if max_dist else None, 334 ) 335 336 337def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 338 def _builder(args: t.List) -> exp.TimeToStr: 339 formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")( 340 [expr_type(this=seq_get(args, 1)), seq_get(args, 0)] 341 ) 342 formatted_time.set("zone", seq_get(args, 2)) 343 return formatted_time 344 345 return _builder 346 347 348def _build_contains_substring(args: t.List) -> exp.Contains: 349 # Lowercase the operands in case of transpilation, as exp.Contains 350 # is case-sensitive on other dialects 351 this = exp.Lower(this=seq_get(args, 0)) 352 expr = exp.Lower(this=seq_get(args, 1)) 353 354 return exp.Contains(this=this, expression=expr, json_scope=seq_get(args, 2)) 355 356 357def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 358 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 359 upper = name.upper() 360 361 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 362 363 if dquote_escaping: 364 self._quote_json_path_key_using_brackets = False 365 366 sql = rename_func(upper)(self, expression) 367 368 if dquote_escaping: 369 self._quote_json_path_key_using_brackets = True 370 371 return sql 372 373 374class BigQuery(Dialect): 375 WEEK_OFFSET = -1 376 UNNEST_COLUMN_ONLY = True 377 SUPPORTS_USER_DEFINED_TYPES = False 378 SUPPORTS_SEMI_ANTI_JOIN = False 379 LOG_BASE_FIRST = False 380 HEX_LOWERCASE = True 381 FORCE_EARLY_ALIAS_REF_EXPANSION = True 382 EXPAND_ONLY_GROUP_ALIAS_REF = True 383 PRESERVE_ORIGINAL_NAMES = True 384 HEX_STRING_IS_INTEGER_TYPE = True 385 BYTE_STRING_IS_BYTES_TYPE = True 386 UUID_IS_STRING_TYPE = True 387 ANNOTATE_ALL_SCOPES = True 388 PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True 389 TABLES_REFERENCEABLE_AS_COLUMNS = True 390 SUPPORTS_STRUCT_STAR_EXPANSION = True 391 EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True 392 QUERY_RESULTS_ARE_STRUCTS = True 393 JSON_EXTRACT_SCALAR_SCALAR_ONLY = True 394 LEAST_GREATEST_IGNORES_NULLS = False 395 DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT 396 PRIORITIZE_NON_LITERAL_TYPES = True 397 ALIAS_POST_VERSION = False 398 399 # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap 400 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' 401 402 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 403 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 404 405 # bigquery udfs are case sensitive 406 NORMALIZE_FUNCTIONS = False 407 408 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 409 TIME_MAPPING = { 410 "%x": "%m/%d/%y", 411 "%D": "%m/%d/%y", 412 "%E6S": "%S.%f", 413 "%e": "%-d", 414 "%F": "%Y-%m-%d", 415 "%T": "%H:%M:%S", 416 "%c": "%a %b %e %H:%M:%S %Y", 417 } 418 419 INVERSE_TIME_MAPPING = { 420 # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery 421 # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse. 422 "%H:%M:%S.%f": "%H:%M:%E6S", 423 } 424 425 FORMAT_MAPPING = { 426 "DD": "%d", 427 "MM": "%m", 428 "MON": "%b", 429 "MONTH": "%B", 430 "YYYY": "%Y", 431 "YY": "%y", 432 "HH": "%I", 433 "HH12": "%I", 434 "HH24": "%H", 435 "MI": "%M", 436 "SS": "%S", 437 "SSSSS": "%f", 438 "TZH": "%z", 439 } 440 441 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 442 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 443 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 444 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 445 PSEUDOCOLUMNS = { 446 "_PARTITIONTIME", 447 "_PARTITIONDATE", 448 "_TABLE_SUFFIX", 449 "_FILE_NAME", 450 "_DBT_MAX_PARTITION", 451 } 452 453 # All set operations require either a DISTINCT or ALL specifier 454 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 455 456 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 457 COERCES_TO = { 458 **TypeAnnotator.COERCES_TO, 459 exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE}, 460 } 461 COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL} 462 COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL} 463 COERCES_TO[exp.DataType.Type.VARCHAR] |= { 464 exp.DataType.Type.DATE, 465 exp.DataType.Type.DATETIME, 466 exp.DataType.Type.TIME, 467 exp.DataType.Type.TIMESTAMP, 468 exp.DataType.Type.TIMESTAMPTZ, 469 } 470 471 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 472 473 def normalize_identifier(self, expression: E) -> E: 474 if ( 475 isinstance(expression, exp.Identifier) 476 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 477 ): 478 parent = expression.parent 479 while isinstance(parent, exp.Dot): 480 parent = parent.parent 481 482 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 483 # by default. The following check uses a heuristic to detect tables based on whether 484 # they are qualified. This should generally be correct, because tables in BigQuery 485 # must be qualified with at least a dataset, unless @@dataset_id is set. 486 case_sensitive = ( 487 isinstance(parent, exp.UserDefinedFunction) 488 or ( 489 isinstance(parent, exp.Table) 490 and parent.db 491 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 492 ) 493 or expression.meta.get("is_table") 494 ) 495 if not case_sensitive: 496 expression.set("this", expression.this.lower()) 497 498 return t.cast(E, expression) 499 500 return super().normalize_identifier(expression) 501 502 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 503 VAR_TOKENS = { 504 TokenType.DASH, 505 TokenType.VAR, 506 } 507 508 class Tokenizer(tokens.Tokenizer): 509 QUOTES = ["'", '"', '"""', "'''"] 510 COMMENTS = ["--", "#", ("/*", "*/")] 511 IDENTIFIERS = ["`"] 512 STRING_ESCAPES = ["\\"] 513 514 HEX_STRINGS = [("0x", ""), ("0X", "")] 515 516 BYTE_STRINGS = [ 517 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 518 ] 519 520 RAW_STRINGS = [ 521 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 522 ] 523 524 NESTED_COMMENTS = False 525 526 KEYWORDS = { 527 **tokens.Tokenizer.KEYWORDS, 528 "ANY TYPE": TokenType.VARIANT, 529 "BEGIN": TokenType.COMMAND, 530 "BEGIN TRANSACTION": TokenType.BEGIN, 531 "BYTEINT": TokenType.INT, 532 "BYTES": TokenType.BINARY, 533 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 534 "DATETIME": TokenType.TIMESTAMP, 535 "DECLARE": TokenType.DECLARE, 536 "ELSEIF": TokenType.COMMAND, 537 "EXCEPTION": TokenType.COMMAND, 538 "EXPORT": TokenType.EXPORT, 539 "FLOAT64": TokenType.DOUBLE, 540 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 541 "LOOP": TokenType.COMMAND, 542 "MODEL": TokenType.MODEL, 543 "NOT DETERMINISTIC": TokenType.VOLATILE, 544 "RECORD": TokenType.STRUCT, 545 "REPEAT": TokenType.COMMAND, 546 "TIMESTAMP": TokenType.TIMESTAMPTZ, 547 "WHILE": TokenType.COMMAND, 548 } 549 KEYWORDS.pop("DIV") 550 KEYWORDS.pop("VALUES") 551 KEYWORDS.pop("/*+") 552 553 class Parser(parser.Parser): 554 PREFIXED_PIVOT_COLUMNS = True 555 LOG_DEFAULTS_TO_LN = True 556 SUPPORTS_IMPLICIT_UNNEST = True 557 JOINS_HAVE_EQUAL_PRECEDENCE = True 558 559 # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier 560 ID_VAR_TOKENS = { 561 *parser.Parser.ID_VAR_TOKENS, 562 TokenType.GRANT, 563 } - {TokenType.ASC, TokenType.DESC} 564 565 ALIAS_TOKENS = { 566 *parser.Parser.ALIAS_TOKENS, 567 TokenType.GRANT, 568 } - {TokenType.ASC, TokenType.DESC} 569 570 TABLE_ALIAS_TOKENS = { 571 *parser.Parser.TABLE_ALIAS_TOKENS, 572 TokenType.GRANT, 573 } - {TokenType.ASC, TokenType.DESC} 574 575 COMMENT_TABLE_ALIAS_TOKENS = { 576 *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, 577 TokenType.GRANT, 578 } - {TokenType.ASC, TokenType.DESC} 579 580 UPDATE_ALIAS_TOKENS = { 581 *parser.Parser.UPDATE_ALIAS_TOKENS, 582 TokenType.GRANT, 583 } - {TokenType.ASC, TokenType.DESC} 584 585 FUNCTIONS = { 586 **parser.Parser.FUNCTIONS, 587 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 588 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 589 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 590 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 591 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 592 "BOOL": exp.JSONBool.from_arg_list, 593 "CONTAINS_SUBSTR": _build_contains_substring, 594 "DATE": _build_date, 595 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 596 "DATE_DIFF": build_date_diff, 597 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 598 "DATE_TRUNC": lambda args: exp.DateTrunc( 599 unit=seq_get(args, 1), 600 this=seq_get(args, 0), 601 zone=seq_get(args, 2), 602 ), 603 "DATETIME": _build_datetime, 604 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 605 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 606 "DIV": binary_from_function(exp.IntDiv), 607 "EDIT_DISTANCE": _build_levenshtein, 608 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 609 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 610 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 611 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 612 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 613 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 614 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 615 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 616 "JSON_STRIP_NULLS": _build_json_strip_nulls, 617 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 618 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 619 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 620 "MD5": exp.MD5Digest.from_arg_list, 621 "SHA1": exp.SHA1Digest.from_arg_list, 622 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 623 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 624 ), 625 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 626 "TO_HEX": _build_to_hex, 627 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 628 [seq_get(args, 1), seq_get(args, 0)] 629 ), 630 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 631 [seq_get(args, 1), seq_get(args, 0)] 632 ), 633 "PARSE_TIMESTAMP": _build_parse_timestamp, 634 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 635 [seq_get(args, 1), seq_get(args, 0)] 636 ), 637 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 638 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 639 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 640 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 641 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 642 ), 643 "SHA256": lambda args: exp.SHA2Digest( 644 this=seq_get(args, 0), length=exp.Literal.number(256) 645 ), 646 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 647 "SPLIT": lambda args: exp.Split( 648 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 649 this=seq_get(args, 0), 650 expression=seq_get(args, 1) or exp.Literal.string(","), 651 ), 652 "STRPOS": exp.StrPosition.from_arg_list, 653 "TIME": _build_time, 654 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 655 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 656 "TIMESTAMP": _build_timestamp, 657 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 658 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 659 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 660 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 661 ), 662 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 663 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 664 ), 665 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 666 "TO_JSON": lambda args: exp.JSONFormat( 667 this=seq_get(args, 0), options=seq_get(args, 1), to_json=True 668 ), 669 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 670 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 671 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 672 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 673 "FROM_HEX": exp.Unhex.from_arg_list, 674 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 675 } 676 # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function 677 FUNCTIONS.pop("SEARCH") 678 679 FUNCTION_PARSERS = { 680 **parser.Parser.FUNCTION_PARSERS, 681 "ARRAY": lambda self: self.expression( 682 exp.Array, 683 expressions=[self._parse_statement()], 684 struct_name_inheritance=True, 685 ), 686 "JSON_ARRAY": lambda self: self.expression( 687 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 688 ), 689 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 690 "PREDICT": lambda self: self._parse_ml(exp.Predict), 691 "TRANSLATE": lambda self: self._parse_translate(), 692 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 693 "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), 694 "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml( 695 exp.GenerateEmbedding, is_text=True 696 ), 697 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 698 "FORECAST": lambda self: self._parse_ml(exp.MLForecast), 699 } 700 FUNCTION_PARSERS.pop("TRIM") 701 702 NO_PAREN_FUNCTIONS = { 703 **parser.Parser.NO_PAREN_FUNCTIONS, 704 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 705 } 706 707 NESTED_TYPE_TOKENS = { 708 *parser.Parser.NESTED_TYPE_TOKENS, 709 TokenType.TABLE, 710 } 711 712 PROPERTY_PARSERS = { 713 **parser.Parser.PROPERTY_PARSERS, 714 "NOT DETERMINISTIC": lambda self: self.expression( 715 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 716 ), 717 "OPTIONS": lambda self: self._parse_with_property(), 718 } 719 720 CONSTRAINT_PARSERS = { 721 **parser.Parser.CONSTRAINT_PARSERS, 722 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 723 } 724 725 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 726 RANGE_PARSERS.pop(TokenType.OVERLAPS) 727 728 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 729 730 STATEMENT_PARSERS = { 731 **parser.Parser.STATEMENT_PARSERS, 732 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 733 TokenType.END: lambda self: self._parse_as_command(self._prev), 734 TokenType.FOR: lambda self: self._parse_for_in(), 735 TokenType.EXPORT: lambda self: self._parse_export_data(), 736 TokenType.DECLARE: lambda self: self._parse_declare(), 737 } 738 739 BRACKET_OFFSETS = { 740 "OFFSET": (0, False), 741 "ORDINAL": (1, False), 742 "SAFE_OFFSET": (0, True), 743 "SAFE_ORDINAL": (1, True), 744 } 745 746 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 747 index = self._index 748 this = self._parse_range() 749 self._match_text_seq("DO") 750 if self._match(TokenType.COMMAND): 751 self._retreat(index) 752 return self._parse_as_command(self._prev) 753 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 754 755 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 756 this = super()._parse_table_part(schema=schema) or self._parse_number() 757 758 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 759 if isinstance(this, exp.Identifier): 760 table_name = this.name 761 while self._match(TokenType.DASH, advance=False) and self._next: 762 start = self._curr 763 while self._is_connected() and not self._match_set( 764 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 765 ): 766 self._advance() 767 768 if start == self._curr: 769 break 770 771 table_name += self._find_sql(start, self._prev) 772 773 this = exp.Identifier( 774 this=table_name, quoted=this.args.get("quoted") 775 ).update_positions(this) 776 elif isinstance(this, exp.Literal): 777 table_name = this.name 778 779 if self._is_connected() and self._parse_var(any_token=True): 780 table_name += self._prev.text 781 782 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 783 784 return this 785 786 def _parse_table_parts( 787 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 788 ) -> exp.Table: 789 table = super()._parse_table_parts( 790 schema=schema, is_db_reference=is_db_reference, wildcard=True 791 ) 792 793 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 794 if not table.catalog: 795 if table.db: 796 previous_db = table.args["db"] 797 parts = table.db.split(".") 798 if len(parts) == 2 and not table.args["db"].quoted: 799 table.set( 800 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 801 ) 802 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 803 else: 804 previous_this = table.this 805 parts = table.name.split(".") 806 if len(parts) == 2 and not table.this.quoted: 807 table.set( 808 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 809 ) 810 table.set( 811 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 812 ) 813 814 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 815 alias = table.this 816 catalog, db, this, *rest = ( 817 exp.to_identifier(p, quoted=True) 818 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 819 ) 820 821 for part in (catalog, db, this): 822 if part: 823 part.update_positions(table.this) 824 825 if rest and this: 826 this = exp.Dot.build([this, *rest]) # type: ignore 827 828 table = exp.Table( 829 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 830 ) 831 table.meta["quoted_table"] = True 832 else: 833 alias = None 834 835 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 836 # dataset, so if the project identifier is omitted we need to fix the ast so that 837 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 838 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 839 # views, because it would seem like the "catalog" part is set, when it'd actually 840 # be the region/dataset. Merging the two identifiers into a single one is done to 841 # avoid producing a 4-part Table reference, which would cause issues in the schema 842 # module, when there are 3-part table names mixed with information schema views. 843 # 844 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 845 table_parts = table.parts 846 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 847 # We need to alias the table here to avoid breaking existing qualified columns. 848 # This is expected to be safe, because if there's an actual alias coming up in 849 # the token stream, it will overwrite this one. If there isn't one, we are only 850 # exposing the name that can be used to reference the view explicitly (a no-op). 851 exp.alias_( 852 table, 853 t.cast(exp.Identifier, alias or table_parts[-1]), 854 table=True, 855 copy=False, 856 ) 857 858 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 859 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 860 line=table_parts[-2].meta.get("line"), 861 col=table_parts[-1].meta.get("col"), 862 start=table_parts[-2].meta.get("start"), 863 end=table_parts[-1].meta.get("end"), 864 ) 865 table.set("this", new_this) 866 table.set("db", seq_get(table_parts, -3)) 867 table.set("catalog", seq_get(table_parts, -4)) 868 869 return table 870 871 def _parse_column(self) -> t.Optional[exp.Expression]: 872 column = super()._parse_column() 873 if isinstance(column, exp.Column): 874 parts = column.parts 875 if any("." in p.name for p in parts): 876 catalog, db, table, this, *rest = ( 877 exp.to_identifier(p, quoted=True) 878 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 879 ) 880 881 if rest and this: 882 this = exp.Dot.build([this, *rest]) # type: ignore 883 884 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 885 column.meta["quoted_column"] = True 886 887 return column 888 889 @t.overload 890 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 891 892 @t.overload 893 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 894 895 def _parse_json_object(self, agg=False): 896 json_object = super()._parse_json_object() 897 array_kv_pair = seq_get(json_object.expressions, 0) 898 899 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 900 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 901 if ( 902 array_kv_pair 903 and isinstance(array_kv_pair.this, exp.Array) 904 and isinstance(array_kv_pair.expression, exp.Array) 905 ): 906 keys = array_kv_pair.this.expressions 907 values = array_kv_pair.expression.expressions 908 909 json_object.set( 910 "expressions", 911 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 912 ) 913 914 return json_object 915 916 def _parse_bracket( 917 self, this: t.Optional[exp.Expression] = None 918 ) -> t.Optional[exp.Expression]: 919 bracket = super()._parse_bracket(this) 920 921 if isinstance(bracket, exp.Array): 922 bracket.set("struct_name_inheritance", True) 923 924 if this is bracket: 925 return bracket 926 927 if isinstance(bracket, exp.Bracket): 928 for expression in bracket.expressions: 929 name = expression.name.upper() 930 931 if name not in self.BRACKET_OFFSETS: 932 break 933 934 offset, safe = self.BRACKET_OFFSETS[name] 935 bracket.set("offset", offset) 936 bracket.set("safe", safe) 937 expression.replace(expression.expressions[0]) 938 939 return bracket 940 941 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 942 unnest = super()._parse_unnest(with_alias=with_alias) 943 944 if not unnest: 945 return None 946 947 unnest_expr = seq_get(unnest.expressions, 0) 948 if unnest_expr: 949 from sqlglot.optimizer.annotate_types import annotate_types 950 951 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 952 953 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 954 # in contrast to other dialects such as DuckDB which flattens only the array by default 955 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 956 array_elem.is_type(exp.DataType.Type.STRUCT) 957 for array_elem in unnest_expr._type.expressions 958 ): 959 unnest.set("explode_array", True) 960 961 return unnest 962 963 def _parse_make_interval(self) -> exp.MakeInterval: 964 expr = exp.MakeInterval() 965 966 for arg_key in MAKE_INTERVAL_KWARGS: 967 value = self._parse_lambda() 968 969 if not value: 970 break 971 972 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 973 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 974 if isinstance(value, exp.Kwarg): 975 arg_key = value.this.name 976 977 expr.set(arg_key, value) 978 979 self._match(TokenType.COMMA) 980 981 return expr 982 983 def _parse_ml(self, expr_type: t.Type[E], **kwargs) -> E: 984 self._match_text_seq("MODEL") 985 this = self._parse_table() 986 987 self._match(TokenType.COMMA) 988 self._match_text_seq("TABLE") 989 990 # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one 991 expression = ( 992 self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None 993 ) 994 995 self._match(TokenType.COMMA) 996 997 return self.expression( 998 expr_type, 999 this=this, 1000 expression=expression, 1001 params_struct=self._parse_bitwise(), 1002 **kwargs, 1003 ) 1004 1005 def _parse_translate(self) -> exp.Translate | exp.MLTranslate: 1006 # Check if this is ML.TRANSLATE by looking at previous tokens 1007 token = seq_get(self._tokens, self._index - 4) 1008 if token and token.text.upper() == "ML": 1009 return self._parse_ml(exp.MLTranslate) 1010 1011 return exp.Translate.from_arg_list(self._parse_function_args()) 1012 1013 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1014 self._match(TokenType.TABLE) 1015 this = self._parse_table() 1016 1017 expr = self.expression(exp.FeaturesAtTime, this=this) 1018 1019 while self._match(TokenType.COMMA): 1020 arg = self._parse_lambda() 1021 1022 # Get the LHS of the Kwarg and set the arg to that value, e.g 1023 # "num_rows => 1" sets the expr's `num_rows` arg 1024 if arg: 1025 expr.set(arg.this.name, arg) 1026 1027 return expr 1028 1029 def _parse_vector_search(self) -> exp.VectorSearch: 1030 self._match(TokenType.TABLE) 1031 base_table = self._parse_table() 1032 1033 self._match(TokenType.COMMA) 1034 1035 column_to_search = self._parse_bitwise() 1036 self._match(TokenType.COMMA) 1037 1038 self._match(TokenType.TABLE) 1039 query_table = self._parse_table() 1040 1041 expr = self.expression( 1042 exp.VectorSearch, 1043 this=base_table, 1044 column_to_search=column_to_search, 1045 query_table=query_table, 1046 ) 1047 1048 while self._match(TokenType.COMMA): 1049 # query_column_to_search can be named argument or positional 1050 if self._match(TokenType.STRING, advance=False): 1051 query_column = self._parse_string() 1052 expr.set("query_column_to_search", query_column) 1053 else: 1054 arg = self._parse_lambda() 1055 if arg: 1056 expr.set(arg.this.name, arg) 1057 1058 return expr 1059 1060 def _parse_export_data(self) -> exp.Export: 1061 self._match_text_seq("DATA") 1062 1063 return self.expression( 1064 exp.Export, 1065 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1066 options=self._parse_properties(), 1067 this=self._match_text_seq("AS") and self._parse_select(), 1068 ) 1069 1070 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 1071 func_index = self._index + 1 1072 this = super()._parse_column_ops(this) 1073 1074 if isinstance(this, exp.Dot) and isinstance(this.expression, exp.Func): 1075 prefix = this.this.name.upper() 1076 1077 func: t.Optional[t.Type[exp.Func]] = None 1078 if prefix == "NET": 1079 func = exp.NetFunc 1080 elif prefix == "SAFE": 1081 func = exp.SafeFunc 1082 1083 if func: 1084 # Retreat to try and parse a known function instead of an anonymous one, 1085 # which is parsed by the base column ops parser due to anonymous_func=true 1086 self._retreat(func_index) 1087 this = func(this=self._parse_function(any_token=True)) 1088 1089 return this 1090 1091 class Generator(generator.Generator): 1092 INTERVAL_ALLOWS_PLURAL_FORM = False 1093 JOIN_HINTS = False 1094 QUERY_HINTS = False 1095 TABLE_HINTS = False 1096 LIMIT_FETCH = "LIMIT" 1097 RENAME_TABLE_WITH_DB = False 1098 NVL2_SUPPORTED = False 1099 UNNEST_WITH_ORDINALITY = False 1100 COLLATE_IS_FUNC = True 1101 LIMIT_ONLY_LITERALS = True 1102 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1103 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1104 JSON_KEY_VALUE_PAIR_SEP = "," 1105 NULL_ORDERING_SUPPORTED = False 1106 IGNORE_NULLS_IN_FUNC = True 1107 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1108 CAN_IMPLEMENT_ARRAY_ANY = True 1109 SUPPORTS_TO_NUMBER = False 1110 NAMED_PLACEHOLDER_TOKEN = "@" 1111 HEX_FUNC = "TO_HEX" 1112 WITH_PROPERTIES_PREFIX = "OPTIONS" 1113 SUPPORTS_EXPLODING_PROJECTIONS = False 1114 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1115 SUPPORTS_UNIX_SECONDS = True 1116 DECLARE_DEFAULT_ASSIGNMENT = "DEFAULT" 1117 1118 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1119 1120 TS_OR_DS_TYPES = ( 1121 exp.TsOrDsToDatetime, 1122 exp.TsOrDsToTimestamp, 1123 exp.TsOrDsToTime, 1124 exp.TsOrDsToDate, 1125 ) 1126 1127 TRANSFORMS = { 1128 **generator.Generator.TRANSFORMS, 1129 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1130 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1131 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1132 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1133 exp.Array: inline_array_unless_query, 1134 exp.ArrayContains: _array_contains_sql, 1135 exp.ArrayFilter: filter_array_using_unnest, 1136 exp.ArrayRemove: filter_array_using_unnest, 1137 exp.BitwiseAndAgg: rename_func("BIT_AND"), 1138 exp.BitwiseOrAgg: rename_func("BIT_OR"), 1139 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 1140 exp.BitwiseCount: rename_func("BIT_COUNT"), 1141 exp.ByteLength: rename_func("BYTE_LENGTH"), 1142 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1143 exp.CollateProperty: lambda self, e: ( 1144 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1145 if e.args.get("default") 1146 else f"COLLATE {self.sql(e, 'this')}" 1147 ), 1148 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1149 exp.CountIf: rename_func("COUNTIF"), 1150 exp.Create: _create_sql, 1151 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1152 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1153 exp.DateDiff: lambda self, e: self.func( 1154 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1155 ), 1156 exp.DateFromParts: rename_func("DATE"), 1157 exp.DateStrToDate: datestrtodate_sql, 1158 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1159 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1160 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1161 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1162 exp.FromTimeZone: lambda self, e: self.func( 1163 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1164 ), 1165 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1166 exp.GroupConcat: lambda self, e: groupconcat_sql( 1167 self, e, func_name="STRING_AGG", within_group=False, sep=None 1168 ), 1169 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1170 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1171 exp.If: if_sql(false_value="NULL"), 1172 exp.ILike: no_ilike_sql, 1173 exp.IntDiv: rename_func("DIV"), 1174 exp.Int64: rename_func("INT64"), 1175 exp.JSONBool: rename_func("BOOL"), 1176 exp.JSONExtract: _json_extract_sql, 1177 exp.JSONExtractArray: _json_extract_sql, 1178 exp.JSONExtractScalar: _json_extract_sql, 1179 exp.JSONFormat: lambda self, e: self.func( 1180 "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING", 1181 e.this, 1182 e.args.get("options"), 1183 ), 1184 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1185 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1186 exp.Levenshtein: _levenshtein_sql, 1187 exp.Max: max_or_greatest, 1188 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1189 exp.MD5Digest: rename_func("MD5"), 1190 exp.Min: min_or_least, 1191 exp.Normalize: lambda self, e: self.func( 1192 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1193 e.this, 1194 e.args.get("form"), 1195 ), 1196 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1197 exp.RegexpExtract: lambda self, e: self.func( 1198 "REGEXP_EXTRACT", 1199 e.this, 1200 e.expression, 1201 e.args.get("position"), 1202 e.args.get("occurrence"), 1203 ), 1204 exp.RegexpExtractAll: lambda self, e: self.func( 1205 "REGEXP_EXTRACT_ALL", e.this, e.expression 1206 ), 1207 exp.RegexpReplace: regexp_replace_sql, 1208 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1209 exp.ReturnsProperty: _returnsproperty_sql, 1210 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1211 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1212 exp.ParseDatetime: lambda self, e: self.func( 1213 "PARSE_DATETIME", self.format_time(e), e.this 1214 ), 1215 exp.Select: transforms.preprocess( 1216 [ 1217 transforms.explode_projection_to_unnest(), 1218 transforms.unqualify_unnest, 1219 transforms.eliminate_distinct_on, 1220 _alias_ordered_group, 1221 transforms.eliminate_semi_and_anti_joins, 1222 ] 1223 ), 1224 exp.SHA: rename_func("SHA1"), 1225 exp.SHA2: sha256_sql, 1226 exp.SHA1Digest: rename_func("SHA1"), 1227 exp.SHA2Digest: sha2_digest_sql, 1228 exp.StabilityProperty: lambda self, e: ( 1229 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1230 ), 1231 exp.String: rename_func("STRING"), 1232 exp.StrPosition: lambda self, e: ( 1233 strposition_sql( 1234 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1235 ) 1236 ), 1237 exp.StrToDate: _str_to_datetime_sql, 1238 exp.StrToTime: _str_to_datetime_sql, 1239 exp.SessionUser: lambda *_: "SESSION_USER()", 1240 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1241 exp.TimeFromParts: rename_func("TIME"), 1242 exp.TimestampFromParts: rename_func("DATETIME"), 1243 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1244 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1245 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1246 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1247 exp.TimeStrToTime: timestrtotime_sql, 1248 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1249 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1250 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1251 exp.TsOrDsToTime: rename_func("TIME"), 1252 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1253 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1254 exp.Unhex: rename_func("FROM_HEX"), 1255 exp.UnixDate: rename_func("UNIX_DATE"), 1256 exp.UnixToTime: _unix_to_time_sql, 1257 exp.Uuid: lambda *_: "GENERATE_UUID()", 1258 exp.Values: _derived_table_values_to_unnest, 1259 exp.VariancePop: rename_func("VAR_POP"), 1260 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1261 } 1262 1263 SUPPORTED_JSON_PATH_PARTS = { 1264 exp.JSONPathKey, 1265 exp.JSONPathRoot, 1266 exp.JSONPathSubscript, 1267 } 1268 1269 TYPE_MAPPING = { 1270 **generator.Generator.TYPE_MAPPING, 1271 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1272 exp.DataType.Type.BIGINT: "INT64", 1273 exp.DataType.Type.BINARY: "BYTES", 1274 exp.DataType.Type.BLOB: "BYTES", 1275 exp.DataType.Type.BOOLEAN: "BOOL", 1276 exp.DataType.Type.CHAR: "STRING", 1277 exp.DataType.Type.DECIMAL: "NUMERIC", 1278 exp.DataType.Type.DOUBLE: "FLOAT64", 1279 exp.DataType.Type.FLOAT: "FLOAT64", 1280 exp.DataType.Type.INT: "INT64", 1281 exp.DataType.Type.NCHAR: "STRING", 1282 exp.DataType.Type.NVARCHAR: "STRING", 1283 exp.DataType.Type.SMALLINT: "INT64", 1284 exp.DataType.Type.TEXT: "STRING", 1285 exp.DataType.Type.TIMESTAMP: "DATETIME", 1286 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1287 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1288 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1289 exp.DataType.Type.TINYINT: "INT64", 1290 exp.DataType.Type.ROWVERSION: "BYTES", 1291 exp.DataType.Type.UUID: "STRING", 1292 exp.DataType.Type.VARBINARY: "BYTES", 1293 exp.DataType.Type.VARCHAR: "STRING", 1294 exp.DataType.Type.VARIANT: "ANY TYPE", 1295 } 1296 1297 PROPERTIES_LOCATION = { 1298 **generator.Generator.PROPERTIES_LOCATION, 1299 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1300 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1301 } 1302 1303 # WINDOW comes after QUALIFY 1304 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1305 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1306 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1307 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1308 } 1309 1310 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1311 RESERVED_KEYWORDS = { 1312 "all", 1313 "and", 1314 "any", 1315 "array", 1316 "as", 1317 "asc", 1318 "assert_rows_modified", 1319 "at", 1320 "between", 1321 "by", 1322 "case", 1323 "cast", 1324 "collate", 1325 "contains", 1326 "create", 1327 "cross", 1328 "cube", 1329 "current", 1330 "default", 1331 "define", 1332 "desc", 1333 "distinct", 1334 "else", 1335 "end", 1336 "enum", 1337 "escape", 1338 "except", 1339 "exclude", 1340 "exists", 1341 "extract", 1342 "false", 1343 "fetch", 1344 "following", 1345 "for", 1346 "from", 1347 "full", 1348 "group", 1349 "grouping", 1350 "groups", 1351 "hash", 1352 "having", 1353 "if", 1354 "ignore", 1355 "in", 1356 "inner", 1357 "intersect", 1358 "interval", 1359 "into", 1360 "is", 1361 "join", 1362 "lateral", 1363 "left", 1364 "like", 1365 "limit", 1366 "lookup", 1367 "merge", 1368 "natural", 1369 "new", 1370 "no", 1371 "not", 1372 "null", 1373 "nulls", 1374 "of", 1375 "on", 1376 "or", 1377 "order", 1378 "outer", 1379 "over", 1380 "partition", 1381 "preceding", 1382 "proto", 1383 "qualify", 1384 "range", 1385 "recursive", 1386 "respect", 1387 "right", 1388 "rollup", 1389 "rows", 1390 "select", 1391 "set", 1392 "some", 1393 "struct", 1394 "tablesample", 1395 "then", 1396 "to", 1397 "treat", 1398 "true", 1399 "unbounded", 1400 "union", 1401 "unnest", 1402 "using", 1403 "when", 1404 "where", 1405 "window", 1406 "with", 1407 "within", 1408 } 1409 1410 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1411 unit = expression.unit 1412 unit_sql = unit.name if unit.is_string else self.sql(unit) 1413 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1414 1415 def mod_sql(self, expression: exp.Mod) -> str: 1416 this = expression.this 1417 expr = expression.expression 1418 return self.func( 1419 "MOD", 1420 this.unnest() if isinstance(this, exp.Paren) else this, 1421 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1422 ) 1423 1424 def column_parts(self, expression: exp.Column) -> str: 1425 if expression.meta.get("quoted_column"): 1426 # If a column reference is of the form `dataset.table`.name, we need 1427 # to preserve the quoted table path, otherwise the reference breaks 1428 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1429 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1430 return f"{table_path}.{self.sql(expression, 'this')}" 1431 1432 return super().column_parts(expression) 1433 1434 def table_parts(self, expression: exp.Table) -> str: 1435 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1436 # we need to make sure the correct quoting is used in each case. 1437 # 1438 # For example, if there is a CTE x that clashes with a schema name, then the former will 1439 # return the table y in that schema, whereas the latter will return the CTE's y column: 1440 # 1441 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1442 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1443 if expression.meta.get("quoted_table"): 1444 table_parts = ".".join(p.name for p in expression.parts) 1445 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1446 1447 return super().table_parts(expression) 1448 1449 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1450 this = expression.this 1451 if isinstance(this, exp.TsOrDsToDatetime): 1452 func_name = "FORMAT_DATETIME" 1453 elif isinstance(this, exp.TsOrDsToTimestamp): 1454 func_name = "FORMAT_TIMESTAMP" 1455 elif isinstance(this, exp.TsOrDsToTime): 1456 func_name = "FORMAT_TIME" 1457 else: 1458 func_name = "FORMAT_DATE" 1459 1460 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1461 return self.func( 1462 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1463 ) 1464 1465 def eq_sql(self, expression: exp.EQ) -> str: 1466 # Operands of = cannot be NULL in BigQuery 1467 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1468 if not isinstance(expression.parent, exp.Update): 1469 return "NULL" 1470 1471 return self.binary(expression, "=") 1472 1473 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1474 parent = expression.parent 1475 1476 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1477 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1478 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1479 return self.func( 1480 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1481 ) 1482 1483 return super().attimezone_sql(expression) 1484 1485 def trycast_sql(self, expression: exp.TryCast) -> str: 1486 return self.cast_sql(expression, safe_prefix="SAFE_") 1487 1488 def bracket_sql(self, expression: exp.Bracket) -> str: 1489 this = expression.this 1490 expressions = expression.expressions 1491 1492 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1493 arg = expressions[0] 1494 if arg.type is None: 1495 from sqlglot.optimizer.annotate_types import annotate_types 1496 1497 arg = annotate_types(arg, dialect=self.dialect) 1498 1499 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1500 # BQ doesn't support bracket syntax with string values for structs 1501 return f"{self.sql(this)}.{arg.name}" 1502 1503 expressions_sql = self.expressions(expression, flat=True) 1504 offset = expression.args.get("offset") 1505 1506 if offset == 0: 1507 expressions_sql = f"OFFSET({expressions_sql})" 1508 elif offset == 1: 1509 expressions_sql = f"ORDINAL({expressions_sql})" 1510 elif offset is not None: 1511 self.unsupported(f"Unsupported array offset: {offset}") 1512 1513 if expression.args.get("safe"): 1514 expressions_sql = f"SAFE_{expressions_sql}" 1515 1516 return f"{self.sql(this)}[{expressions_sql}]" 1517 1518 def in_unnest_op(self, expression: exp.Unnest) -> str: 1519 return self.sql(expression) 1520 1521 def version_sql(self, expression: exp.Version) -> str: 1522 if expression.name == "TIMESTAMP": 1523 expression.set("this", "SYSTEM_TIME") 1524 return super().version_sql(expression) 1525 1526 def contains_sql(self, expression: exp.Contains) -> str: 1527 this = expression.this 1528 expr = expression.expression 1529 1530 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1531 this = this.this 1532 expr = expr.this 1533 1534 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1535 1536 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1537 this = expression.this 1538 1539 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1540 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1541 # because they aren't literals and so the above syntax is invalid BigQuery. 1542 if isinstance(this, exp.Array): 1543 elem = seq_get(this.expressions, 0) 1544 if not (elem and elem.find(exp.Query)): 1545 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1546 1547 return super().cast_sql(expression, safe_prefix=safe_prefix)
248def build_date_diff(args: t.List) -> exp.Expression: 249 expr = exp.DateDiff( 250 this=seq_get(args, 0), 251 expression=seq_get(args, 1), 252 unit=seq_get(args, 2), 253 date_part_boundary=True, 254 ) 255 256 # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation 257 # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too 258 unit = expr.args.get("unit") 259 260 if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": 261 expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY"))) 262 263 return expr
375class BigQuery(Dialect): 376 WEEK_OFFSET = -1 377 UNNEST_COLUMN_ONLY = True 378 SUPPORTS_USER_DEFINED_TYPES = False 379 SUPPORTS_SEMI_ANTI_JOIN = False 380 LOG_BASE_FIRST = False 381 HEX_LOWERCASE = True 382 FORCE_EARLY_ALIAS_REF_EXPANSION = True 383 EXPAND_ONLY_GROUP_ALIAS_REF = True 384 PRESERVE_ORIGINAL_NAMES = True 385 HEX_STRING_IS_INTEGER_TYPE = True 386 BYTE_STRING_IS_BYTES_TYPE = True 387 UUID_IS_STRING_TYPE = True 388 ANNOTATE_ALL_SCOPES = True 389 PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True 390 TABLES_REFERENCEABLE_AS_COLUMNS = True 391 SUPPORTS_STRUCT_STAR_EXPANSION = True 392 EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True 393 QUERY_RESULTS_ARE_STRUCTS = True 394 JSON_EXTRACT_SCALAR_SCALAR_ONLY = True 395 LEAST_GREATEST_IGNORES_NULLS = False 396 DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT 397 PRIORITIZE_NON_LITERAL_TYPES = True 398 ALIAS_POST_VERSION = False 399 400 # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap 401 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' 402 403 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 404 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 405 406 # bigquery udfs are case sensitive 407 NORMALIZE_FUNCTIONS = False 408 409 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 410 TIME_MAPPING = { 411 "%x": "%m/%d/%y", 412 "%D": "%m/%d/%y", 413 "%E6S": "%S.%f", 414 "%e": "%-d", 415 "%F": "%Y-%m-%d", 416 "%T": "%H:%M:%S", 417 "%c": "%a %b %e %H:%M:%S %Y", 418 } 419 420 INVERSE_TIME_MAPPING = { 421 # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery 422 # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse. 423 "%H:%M:%S.%f": "%H:%M:%E6S", 424 } 425 426 FORMAT_MAPPING = { 427 "DD": "%d", 428 "MM": "%m", 429 "MON": "%b", 430 "MONTH": "%B", 431 "YYYY": "%Y", 432 "YY": "%y", 433 "HH": "%I", 434 "HH12": "%I", 435 "HH24": "%H", 436 "MI": "%M", 437 "SS": "%S", 438 "SSSSS": "%f", 439 "TZH": "%z", 440 } 441 442 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 443 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 444 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 445 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 446 PSEUDOCOLUMNS = { 447 "_PARTITIONTIME", 448 "_PARTITIONDATE", 449 "_TABLE_SUFFIX", 450 "_FILE_NAME", 451 "_DBT_MAX_PARTITION", 452 } 453 454 # All set operations require either a DISTINCT or ALL specifier 455 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 458 COERCES_TO = { 459 **TypeAnnotator.COERCES_TO, 460 exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE}, 461 } 462 COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL} 463 COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL} 464 COERCES_TO[exp.DataType.Type.VARCHAR] |= { 465 exp.DataType.Type.DATE, 466 exp.DataType.Type.DATETIME, 467 exp.DataType.Type.TIME, 468 exp.DataType.Type.TIMESTAMP, 469 exp.DataType.Type.TIMESTAMPTZ, 470 } 471 472 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 473 474 def normalize_identifier(self, expression: E) -> E: 475 if ( 476 isinstance(expression, exp.Identifier) 477 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 478 ): 479 parent = expression.parent 480 while isinstance(parent, exp.Dot): 481 parent = parent.parent 482 483 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 484 # by default. The following check uses a heuristic to detect tables based on whether 485 # they are qualified. This should generally be correct, because tables in BigQuery 486 # must be qualified with at least a dataset, unless @@dataset_id is set. 487 case_sensitive = ( 488 isinstance(parent, exp.UserDefinedFunction) 489 or ( 490 isinstance(parent, exp.Table) 491 and parent.db 492 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 493 ) 494 or expression.meta.get("is_table") 495 ) 496 if not case_sensitive: 497 expression.set("this", expression.this.lower()) 498 499 return t.cast(E, expression) 500 501 return super().normalize_identifier(expression) 502 503 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 504 VAR_TOKENS = { 505 TokenType.DASH, 506 TokenType.VAR, 507 } 508 509 class Tokenizer(tokens.Tokenizer): 510 QUOTES = ["'", '"', '"""', "'''"] 511 COMMENTS = ["--", "#", ("/*", "*/")] 512 IDENTIFIERS = ["`"] 513 STRING_ESCAPES = ["\\"] 514 515 HEX_STRINGS = [("0x", ""), ("0X", "")] 516 517 BYTE_STRINGS = [ 518 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 519 ] 520 521 RAW_STRINGS = [ 522 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 523 ] 524 525 NESTED_COMMENTS = False 526 527 KEYWORDS = { 528 **tokens.Tokenizer.KEYWORDS, 529 "ANY TYPE": TokenType.VARIANT, 530 "BEGIN": TokenType.COMMAND, 531 "BEGIN TRANSACTION": TokenType.BEGIN, 532 "BYTEINT": TokenType.INT, 533 "BYTES": TokenType.BINARY, 534 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 535 "DATETIME": TokenType.TIMESTAMP, 536 "DECLARE": TokenType.DECLARE, 537 "ELSEIF": TokenType.COMMAND, 538 "EXCEPTION": TokenType.COMMAND, 539 "EXPORT": TokenType.EXPORT, 540 "FLOAT64": TokenType.DOUBLE, 541 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 542 "LOOP": TokenType.COMMAND, 543 "MODEL": TokenType.MODEL, 544 "NOT DETERMINISTIC": TokenType.VOLATILE, 545 "RECORD": TokenType.STRUCT, 546 "REPEAT": TokenType.COMMAND, 547 "TIMESTAMP": TokenType.TIMESTAMPTZ, 548 "WHILE": TokenType.COMMAND, 549 } 550 KEYWORDS.pop("DIV") 551 KEYWORDS.pop("VALUES") 552 KEYWORDS.pop("/*+") 553 554 class Parser(parser.Parser): 555 PREFIXED_PIVOT_COLUMNS = True 556 LOG_DEFAULTS_TO_LN = True 557 SUPPORTS_IMPLICIT_UNNEST = True 558 JOINS_HAVE_EQUAL_PRECEDENCE = True 559 560 # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier 561 ID_VAR_TOKENS = { 562 *parser.Parser.ID_VAR_TOKENS, 563 TokenType.GRANT, 564 } - {TokenType.ASC, TokenType.DESC} 565 566 ALIAS_TOKENS = { 567 *parser.Parser.ALIAS_TOKENS, 568 TokenType.GRANT, 569 } - {TokenType.ASC, TokenType.DESC} 570 571 TABLE_ALIAS_TOKENS = { 572 *parser.Parser.TABLE_ALIAS_TOKENS, 573 TokenType.GRANT, 574 } - {TokenType.ASC, TokenType.DESC} 575 576 COMMENT_TABLE_ALIAS_TOKENS = { 577 *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, 578 TokenType.GRANT, 579 } - {TokenType.ASC, TokenType.DESC} 580 581 UPDATE_ALIAS_TOKENS = { 582 *parser.Parser.UPDATE_ALIAS_TOKENS, 583 TokenType.GRANT, 584 } - {TokenType.ASC, TokenType.DESC} 585 586 FUNCTIONS = { 587 **parser.Parser.FUNCTIONS, 588 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 589 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 590 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 591 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 592 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 593 "BOOL": exp.JSONBool.from_arg_list, 594 "CONTAINS_SUBSTR": _build_contains_substring, 595 "DATE": _build_date, 596 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 597 "DATE_DIFF": build_date_diff, 598 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 599 "DATE_TRUNC": lambda args: exp.DateTrunc( 600 unit=seq_get(args, 1), 601 this=seq_get(args, 0), 602 zone=seq_get(args, 2), 603 ), 604 "DATETIME": _build_datetime, 605 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 606 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 607 "DIV": binary_from_function(exp.IntDiv), 608 "EDIT_DISTANCE": _build_levenshtein, 609 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 610 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 611 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 612 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 613 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 614 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 615 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 616 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 617 "JSON_STRIP_NULLS": _build_json_strip_nulls, 618 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 619 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 620 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 621 "MD5": exp.MD5Digest.from_arg_list, 622 "SHA1": exp.SHA1Digest.from_arg_list, 623 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 624 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 625 ), 626 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 627 "TO_HEX": _build_to_hex, 628 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 629 [seq_get(args, 1), seq_get(args, 0)] 630 ), 631 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 632 [seq_get(args, 1), seq_get(args, 0)] 633 ), 634 "PARSE_TIMESTAMP": _build_parse_timestamp, 635 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 636 [seq_get(args, 1), seq_get(args, 0)] 637 ), 638 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 639 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 640 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 641 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 642 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 643 ), 644 "SHA256": lambda args: exp.SHA2Digest( 645 this=seq_get(args, 0), length=exp.Literal.number(256) 646 ), 647 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 648 "SPLIT": lambda args: exp.Split( 649 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 650 this=seq_get(args, 0), 651 expression=seq_get(args, 1) or exp.Literal.string(","), 652 ), 653 "STRPOS": exp.StrPosition.from_arg_list, 654 "TIME": _build_time, 655 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 656 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 657 "TIMESTAMP": _build_timestamp, 658 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 659 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 660 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 661 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 662 ), 663 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 664 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 665 ), 666 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 667 "TO_JSON": lambda args: exp.JSONFormat( 668 this=seq_get(args, 0), options=seq_get(args, 1), to_json=True 669 ), 670 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 671 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 672 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 673 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 674 "FROM_HEX": exp.Unhex.from_arg_list, 675 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 676 } 677 # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function 678 FUNCTIONS.pop("SEARCH") 679 680 FUNCTION_PARSERS = { 681 **parser.Parser.FUNCTION_PARSERS, 682 "ARRAY": lambda self: self.expression( 683 exp.Array, 684 expressions=[self._parse_statement()], 685 struct_name_inheritance=True, 686 ), 687 "JSON_ARRAY": lambda self: self.expression( 688 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 689 ), 690 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 691 "PREDICT": lambda self: self._parse_ml(exp.Predict), 692 "TRANSLATE": lambda self: self._parse_translate(), 693 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 694 "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), 695 "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml( 696 exp.GenerateEmbedding, is_text=True 697 ), 698 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 699 "FORECAST": lambda self: self._parse_ml(exp.MLForecast), 700 } 701 FUNCTION_PARSERS.pop("TRIM") 702 703 NO_PAREN_FUNCTIONS = { 704 **parser.Parser.NO_PAREN_FUNCTIONS, 705 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 706 } 707 708 NESTED_TYPE_TOKENS = { 709 *parser.Parser.NESTED_TYPE_TOKENS, 710 TokenType.TABLE, 711 } 712 713 PROPERTY_PARSERS = { 714 **parser.Parser.PROPERTY_PARSERS, 715 "NOT DETERMINISTIC": lambda self: self.expression( 716 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 717 ), 718 "OPTIONS": lambda self: self._parse_with_property(), 719 } 720 721 CONSTRAINT_PARSERS = { 722 **parser.Parser.CONSTRAINT_PARSERS, 723 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 724 } 725 726 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 727 RANGE_PARSERS.pop(TokenType.OVERLAPS) 728 729 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 730 731 STATEMENT_PARSERS = { 732 **parser.Parser.STATEMENT_PARSERS, 733 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 734 TokenType.END: lambda self: self._parse_as_command(self._prev), 735 TokenType.FOR: lambda self: self._parse_for_in(), 736 TokenType.EXPORT: lambda self: self._parse_export_data(), 737 TokenType.DECLARE: lambda self: self._parse_declare(), 738 } 739 740 BRACKET_OFFSETS = { 741 "OFFSET": (0, False), 742 "ORDINAL": (1, False), 743 "SAFE_OFFSET": (0, True), 744 "SAFE_ORDINAL": (1, True), 745 } 746 747 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 748 index = self._index 749 this = self._parse_range() 750 self._match_text_seq("DO") 751 if self._match(TokenType.COMMAND): 752 self._retreat(index) 753 return self._parse_as_command(self._prev) 754 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 755 756 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 757 this = super()._parse_table_part(schema=schema) or self._parse_number() 758 759 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 760 if isinstance(this, exp.Identifier): 761 table_name = this.name 762 while self._match(TokenType.DASH, advance=False) and self._next: 763 start = self._curr 764 while self._is_connected() and not self._match_set( 765 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 766 ): 767 self._advance() 768 769 if start == self._curr: 770 break 771 772 table_name += self._find_sql(start, self._prev) 773 774 this = exp.Identifier( 775 this=table_name, quoted=this.args.get("quoted") 776 ).update_positions(this) 777 elif isinstance(this, exp.Literal): 778 table_name = this.name 779 780 if self._is_connected() and self._parse_var(any_token=True): 781 table_name += self._prev.text 782 783 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 784 785 return this 786 787 def _parse_table_parts( 788 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 789 ) -> exp.Table: 790 table = super()._parse_table_parts( 791 schema=schema, is_db_reference=is_db_reference, wildcard=True 792 ) 793 794 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 795 if not table.catalog: 796 if table.db: 797 previous_db = table.args["db"] 798 parts = table.db.split(".") 799 if len(parts) == 2 and not table.args["db"].quoted: 800 table.set( 801 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 802 ) 803 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 804 else: 805 previous_this = table.this 806 parts = table.name.split(".") 807 if len(parts) == 2 and not table.this.quoted: 808 table.set( 809 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 810 ) 811 table.set( 812 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 813 ) 814 815 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 816 alias = table.this 817 catalog, db, this, *rest = ( 818 exp.to_identifier(p, quoted=True) 819 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 820 ) 821 822 for part in (catalog, db, this): 823 if part: 824 part.update_positions(table.this) 825 826 if rest and this: 827 this = exp.Dot.build([this, *rest]) # type: ignore 828 829 table = exp.Table( 830 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 831 ) 832 table.meta["quoted_table"] = True 833 else: 834 alias = None 835 836 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 837 # dataset, so if the project identifier is omitted we need to fix the ast so that 838 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 839 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 840 # views, because it would seem like the "catalog" part is set, when it'd actually 841 # be the region/dataset. Merging the two identifiers into a single one is done to 842 # avoid producing a 4-part Table reference, which would cause issues in the schema 843 # module, when there are 3-part table names mixed with information schema views. 844 # 845 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 846 table_parts = table.parts 847 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 848 # We need to alias the table here to avoid breaking existing qualified columns. 849 # This is expected to be safe, because if there's an actual alias coming up in 850 # the token stream, it will overwrite this one. If there isn't one, we are only 851 # exposing the name that can be used to reference the view explicitly (a no-op). 852 exp.alias_( 853 table, 854 t.cast(exp.Identifier, alias or table_parts[-1]), 855 table=True, 856 copy=False, 857 ) 858 859 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 860 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 861 line=table_parts[-2].meta.get("line"), 862 col=table_parts[-1].meta.get("col"), 863 start=table_parts[-2].meta.get("start"), 864 end=table_parts[-1].meta.get("end"), 865 ) 866 table.set("this", new_this) 867 table.set("db", seq_get(table_parts, -3)) 868 table.set("catalog", seq_get(table_parts, -4)) 869 870 return table 871 872 def _parse_column(self) -> t.Optional[exp.Expression]: 873 column = super()._parse_column() 874 if isinstance(column, exp.Column): 875 parts = column.parts 876 if any("." in p.name for p in parts): 877 catalog, db, table, this, *rest = ( 878 exp.to_identifier(p, quoted=True) 879 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 880 ) 881 882 if rest and this: 883 this = exp.Dot.build([this, *rest]) # type: ignore 884 885 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 886 column.meta["quoted_column"] = True 887 888 return column 889 890 @t.overload 891 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 892 893 @t.overload 894 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 895 896 def _parse_json_object(self, agg=False): 897 json_object = super()._parse_json_object() 898 array_kv_pair = seq_get(json_object.expressions, 0) 899 900 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 901 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 902 if ( 903 array_kv_pair 904 and isinstance(array_kv_pair.this, exp.Array) 905 and isinstance(array_kv_pair.expression, exp.Array) 906 ): 907 keys = array_kv_pair.this.expressions 908 values = array_kv_pair.expression.expressions 909 910 json_object.set( 911 "expressions", 912 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 913 ) 914 915 return json_object 916 917 def _parse_bracket( 918 self, this: t.Optional[exp.Expression] = None 919 ) -> t.Optional[exp.Expression]: 920 bracket = super()._parse_bracket(this) 921 922 if isinstance(bracket, exp.Array): 923 bracket.set("struct_name_inheritance", True) 924 925 if this is bracket: 926 return bracket 927 928 if isinstance(bracket, exp.Bracket): 929 for expression in bracket.expressions: 930 name = expression.name.upper() 931 932 if name not in self.BRACKET_OFFSETS: 933 break 934 935 offset, safe = self.BRACKET_OFFSETS[name] 936 bracket.set("offset", offset) 937 bracket.set("safe", safe) 938 expression.replace(expression.expressions[0]) 939 940 return bracket 941 942 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 943 unnest = super()._parse_unnest(with_alias=with_alias) 944 945 if not unnest: 946 return None 947 948 unnest_expr = seq_get(unnest.expressions, 0) 949 if unnest_expr: 950 from sqlglot.optimizer.annotate_types import annotate_types 951 952 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 953 954 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 955 # in contrast to other dialects such as DuckDB which flattens only the array by default 956 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 957 array_elem.is_type(exp.DataType.Type.STRUCT) 958 for array_elem in unnest_expr._type.expressions 959 ): 960 unnest.set("explode_array", True) 961 962 return unnest 963 964 def _parse_make_interval(self) -> exp.MakeInterval: 965 expr = exp.MakeInterval() 966 967 for arg_key in MAKE_INTERVAL_KWARGS: 968 value = self._parse_lambda() 969 970 if not value: 971 break 972 973 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 974 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 975 if isinstance(value, exp.Kwarg): 976 arg_key = value.this.name 977 978 expr.set(arg_key, value) 979 980 self._match(TokenType.COMMA) 981 982 return expr 983 984 def _parse_ml(self, expr_type: t.Type[E], **kwargs) -> E: 985 self._match_text_seq("MODEL") 986 this = self._parse_table() 987 988 self._match(TokenType.COMMA) 989 self._match_text_seq("TABLE") 990 991 # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one 992 expression = ( 993 self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None 994 ) 995 996 self._match(TokenType.COMMA) 997 998 return self.expression( 999 expr_type, 1000 this=this, 1001 expression=expression, 1002 params_struct=self._parse_bitwise(), 1003 **kwargs, 1004 ) 1005 1006 def _parse_translate(self) -> exp.Translate | exp.MLTranslate: 1007 # Check if this is ML.TRANSLATE by looking at previous tokens 1008 token = seq_get(self._tokens, self._index - 4) 1009 if token and token.text.upper() == "ML": 1010 return self._parse_ml(exp.MLTranslate) 1011 1012 return exp.Translate.from_arg_list(self._parse_function_args()) 1013 1014 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1015 self._match(TokenType.TABLE) 1016 this = self._parse_table() 1017 1018 expr = self.expression(exp.FeaturesAtTime, this=this) 1019 1020 while self._match(TokenType.COMMA): 1021 arg = self._parse_lambda() 1022 1023 # Get the LHS of the Kwarg and set the arg to that value, e.g 1024 # "num_rows => 1" sets the expr's `num_rows` arg 1025 if arg: 1026 expr.set(arg.this.name, arg) 1027 1028 return expr 1029 1030 def _parse_vector_search(self) -> exp.VectorSearch: 1031 self._match(TokenType.TABLE) 1032 base_table = self._parse_table() 1033 1034 self._match(TokenType.COMMA) 1035 1036 column_to_search = self._parse_bitwise() 1037 self._match(TokenType.COMMA) 1038 1039 self._match(TokenType.TABLE) 1040 query_table = self._parse_table() 1041 1042 expr = self.expression( 1043 exp.VectorSearch, 1044 this=base_table, 1045 column_to_search=column_to_search, 1046 query_table=query_table, 1047 ) 1048 1049 while self._match(TokenType.COMMA): 1050 # query_column_to_search can be named argument or positional 1051 if self._match(TokenType.STRING, advance=False): 1052 query_column = self._parse_string() 1053 expr.set("query_column_to_search", query_column) 1054 else: 1055 arg = self._parse_lambda() 1056 if arg: 1057 expr.set(arg.this.name, arg) 1058 1059 return expr 1060 1061 def _parse_export_data(self) -> exp.Export: 1062 self._match_text_seq("DATA") 1063 1064 return self.expression( 1065 exp.Export, 1066 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1067 options=self._parse_properties(), 1068 this=self._match_text_seq("AS") and self._parse_select(), 1069 ) 1070 1071 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 1072 func_index = self._index + 1 1073 this = super()._parse_column_ops(this) 1074 1075 if isinstance(this, exp.Dot) and isinstance(this.expression, exp.Func): 1076 prefix = this.this.name.upper() 1077 1078 func: t.Optional[t.Type[exp.Func]] = None 1079 if prefix == "NET": 1080 func = exp.NetFunc 1081 elif prefix == "SAFE": 1082 func = exp.SafeFunc 1083 1084 if func: 1085 # Retreat to try and parse a known function instead of an anonymous one, 1086 # which is parsed by the base column ops parser due to anonymous_func=true 1087 self._retreat(func_index) 1088 this = func(this=self._parse_function(any_token=True)) 1089 1090 return this 1091 1092 class Generator(generator.Generator): 1093 INTERVAL_ALLOWS_PLURAL_FORM = False 1094 JOIN_HINTS = False 1095 QUERY_HINTS = False 1096 TABLE_HINTS = False 1097 LIMIT_FETCH = "LIMIT" 1098 RENAME_TABLE_WITH_DB = False 1099 NVL2_SUPPORTED = False 1100 UNNEST_WITH_ORDINALITY = False 1101 COLLATE_IS_FUNC = True 1102 LIMIT_ONLY_LITERALS = True 1103 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1104 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1105 JSON_KEY_VALUE_PAIR_SEP = "," 1106 NULL_ORDERING_SUPPORTED = False 1107 IGNORE_NULLS_IN_FUNC = True 1108 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1109 CAN_IMPLEMENT_ARRAY_ANY = True 1110 SUPPORTS_TO_NUMBER = False 1111 NAMED_PLACEHOLDER_TOKEN = "@" 1112 HEX_FUNC = "TO_HEX" 1113 WITH_PROPERTIES_PREFIX = "OPTIONS" 1114 SUPPORTS_EXPLODING_PROJECTIONS = False 1115 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1116 SUPPORTS_UNIX_SECONDS = True 1117 DECLARE_DEFAULT_ASSIGNMENT = "DEFAULT" 1118 1119 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1120 1121 TS_OR_DS_TYPES = ( 1122 exp.TsOrDsToDatetime, 1123 exp.TsOrDsToTimestamp, 1124 exp.TsOrDsToTime, 1125 exp.TsOrDsToDate, 1126 ) 1127 1128 TRANSFORMS = { 1129 **generator.Generator.TRANSFORMS, 1130 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1131 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1132 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1133 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1134 exp.Array: inline_array_unless_query, 1135 exp.ArrayContains: _array_contains_sql, 1136 exp.ArrayFilter: filter_array_using_unnest, 1137 exp.ArrayRemove: filter_array_using_unnest, 1138 exp.BitwiseAndAgg: rename_func("BIT_AND"), 1139 exp.BitwiseOrAgg: rename_func("BIT_OR"), 1140 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 1141 exp.BitwiseCount: rename_func("BIT_COUNT"), 1142 exp.ByteLength: rename_func("BYTE_LENGTH"), 1143 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1144 exp.CollateProperty: lambda self, e: ( 1145 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1146 if e.args.get("default") 1147 else f"COLLATE {self.sql(e, 'this')}" 1148 ), 1149 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1150 exp.CountIf: rename_func("COUNTIF"), 1151 exp.Create: _create_sql, 1152 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1153 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1154 exp.DateDiff: lambda self, e: self.func( 1155 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1156 ), 1157 exp.DateFromParts: rename_func("DATE"), 1158 exp.DateStrToDate: datestrtodate_sql, 1159 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1160 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1161 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1162 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1163 exp.FromTimeZone: lambda self, e: self.func( 1164 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1165 ), 1166 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1167 exp.GroupConcat: lambda self, e: groupconcat_sql( 1168 self, e, func_name="STRING_AGG", within_group=False, sep=None 1169 ), 1170 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1171 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1172 exp.If: if_sql(false_value="NULL"), 1173 exp.ILike: no_ilike_sql, 1174 exp.IntDiv: rename_func("DIV"), 1175 exp.Int64: rename_func("INT64"), 1176 exp.JSONBool: rename_func("BOOL"), 1177 exp.JSONExtract: _json_extract_sql, 1178 exp.JSONExtractArray: _json_extract_sql, 1179 exp.JSONExtractScalar: _json_extract_sql, 1180 exp.JSONFormat: lambda self, e: self.func( 1181 "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING", 1182 e.this, 1183 e.args.get("options"), 1184 ), 1185 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1186 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1187 exp.Levenshtein: _levenshtein_sql, 1188 exp.Max: max_or_greatest, 1189 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1190 exp.MD5Digest: rename_func("MD5"), 1191 exp.Min: min_or_least, 1192 exp.Normalize: lambda self, e: self.func( 1193 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1194 e.this, 1195 e.args.get("form"), 1196 ), 1197 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1198 exp.RegexpExtract: lambda self, e: self.func( 1199 "REGEXP_EXTRACT", 1200 e.this, 1201 e.expression, 1202 e.args.get("position"), 1203 e.args.get("occurrence"), 1204 ), 1205 exp.RegexpExtractAll: lambda self, e: self.func( 1206 "REGEXP_EXTRACT_ALL", e.this, e.expression 1207 ), 1208 exp.RegexpReplace: regexp_replace_sql, 1209 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1210 exp.ReturnsProperty: _returnsproperty_sql, 1211 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1212 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1213 exp.ParseDatetime: lambda self, e: self.func( 1214 "PARSE_DATETIME", self.format_time(e), e.this 1215 ), 1216 exp.Select: transforms.preprocess( 1217 [ 1218 transforms.explode_projection_to_unnest(), 1219 transforms.unqualify_unnest, 1220 transforms.eliminate_distinct_on, 1221 _alias_ordered_group, 1222 transforms.eliminate_semi_and_anti_joins, 1223 ] 1224 ), 1225 exp.SHA: rename_func("SHA1"), 1226 exp.SHA2: sha256_sql, 1227 exp.SHA1Digest: rename_func("SHA1"), 1228 exp.SHA2Digest: sha2_digest_sql, 1229 exp.StabilityProperty: lambda self, e: ( 1230 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1231 ), 1232 exp.String: rename_func("STRING"), 1233 exp.StrPosition: lambda self, e: ( 1234 strposition_sql( 1235 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1236 ) 1237 ), 1238 exp.StrToDate: _str_to_datetime_sql, 1239 exp.StrToTime: _str_to_datetime_sql, 1240 exp.SessionUser: lambda *_: "SESSION_USER()", 1241 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1242 exp.TimeFromParts: rename_func("TIME"), 1243 exp.TimestampFromParts: rename_func("DATETIME"), 1244 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1245 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1246 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1247 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1248 exp.TimeStrToTime: timestrtotime_sql, 1249 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1250 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1251 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1252 exp.TsOrDsToTime: rename_func("TIME"), 1253 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1254 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1255 exp.Unhex: rename_func("FROM_HEX"), 1256 exp.UnixDate: rename_func("UNIX_DATE"), 1257 exp.UnixToTime: _unix_to_time_sql, 1258 exp.Uuid: lambda *_: "GENERATE_UUID()", 1259 exp.Values: _derived_table_values_to_unnest, 1260 exp.VariancePop: rename_func("VAR_POP"), 1261 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1262 } 1263 1264 SUPPORTED_JSON_PATH_PARTS = { 1265 exp.JSONPathKey, 1266 exp.JSONPathRoot, 1267 exp.JSONPathSubscript, 1268 } 1269 1270 TYPE_MAPPING = { 1271 **generator.Generator.TYPE_MAPPING, 1272 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1273 exp.DataType.Type.BIGINT: "INT64", 1274 exp.DataType.Type.BINARY: "BYTES", 1275 exp.DataType.Type.BLOB: "BYTES", 1276 exp.DataType.Type.BOOLEAN: "BOOL", 1277 exp.DataType.Type.CHAR: "STRING", 1278 exp.DataType.Type.DECIMAL: "NUMERIC", 1279 exp.DataType.Type.DOUBLE: "FLOAT64", 1280 exp.DataType.Type.FLOAT: "FLOAT64", 1281 exp.DataType.Type.INT: "INT64", 1282 exp.DataType.Type.NCHAR: "STRING", 1283 exp.DataType.Type.NVARCHAR: "STRING", 1284 exp.DataType.Type.SMALLINT: "INT64", 1285 exp.DataType.Type.TEXT: "STRING", 1286 exp.DataType.Type.TIMESTAMP: "DATETIME", 1287 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1288 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1289 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1290 exp.DataType.Type.TINYINT: "INT64", 1291 exp.DataType.Type.ROWVERSION: "BYTES", 1292 exp.DataType.Type.UUID: "STRING", 1293 exp.DataType.Type.VARBINARY: "BYTES", 1294 exp.DataType.Type.VARCHAR: "STRING", 1295 exp.DataType.Type.VARIANT: "ANY TYPE", 1296 } 1297 1298 PROPERTIES_LOCATION = { 1299 **generator.Generator.PROPERTIES_LOCATION, 1300 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1301 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1302 } 1303 1304 # WINDOW comes after QUALIFY 1305 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1306 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1307 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1308 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1309 } 1310 1311 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1312 RESERVED_KEYWORDS = { 1313 "all", 1314 "and", 1315 "any", 1316 "array", 1317 "as", 1318 "asc", 1319 "assert_rows_modified", 1320 "at", 1321 "between", 1322 "by", 1323 "case", 1324 "cast", 1325 "collate", 1326 "contains", 1327 "create", 1328 "cross", 1329 "cube", 1330 "current", 1331 "default", 1332 "define", 1333 "desc", 1334 "distinct", 1335 "else", 1336 "end", 1337 "enum", 1338 "escape", 1339 "except", 1340 "exclude", 1341 "exists", 1342 "extract", 1343 "false", 1344 "fetch", 1345 "following", 1346 "for", 1347 "from", 1348 "full", 1349 "group", 1350 "grouping", 1351 "groups", 1352 "hash", 1353 "having", 1354 "if", 1355 "ignore", 1356 "in", 1357 "inner", 1358 "intersect", 1359 "interval", 1360 "into", 1361 "is", 1362 "join", 1363 "lateral", 1364 "left", 1365 "like", 1366 "limit", 1367 "lookup", 1368 "merge", 1369 "natural", 1370 "new", 1371 "no", 1372 "not", 1373 "null", 1374 "nulls", 1375 "of", 1376 "on", 1377 "or", 1378 "order", 1379 "outer", 1380 "over", 1381 "partition", 1382 "preceding", 1383 "proto", 1384 "qualify", 1385 "range", 1386 "recursive", 1387 "respect", 1388 "right", 1389 "rollup", 1390 "rows", 1391 "select", 1392 "set", 1393 "some", 1394 "struct", 1395 "tablesample", 1396 "then", 1397 "to", 1398 "treat", 1399 "true", 1400 "unbounded", 1401 "union", 1402 "unnest", 1403 "using", 1404 "when", 1405 "where", 1406 "window", 1407 "with", 1408 "within", 1409 } 1410 1411 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1412 unit = expression.unit 1413 unit_sql = unit.name if unit.is_string else self.sql(unit) 1414 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1415 1416 def mod_sql(self, expression: exp.Mod) -> str: 1417 this = expression.this 1418 expr = expression.expression 1419 return self.func( 1420 "MOD", 1421 this.unnest() if isinstance(this, exp.Paren) else this, 1422 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1423 ) 1424 1425 def column_parts(self, expression: exp.Column) -> str: 1426 if expression.meta.get("quoted_column"): 1427 # If a column reference is of the form `dataset.table`.name, we need 1428 # to preserve the quoted table path, otherwise the reference breaks 1429 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1430 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1431 return f"{table_path}.{self.sql(expression, 'this')}" 1432 1433 return super().column_parts(expression) 1434 1435 def table_parts(self, expression: exp.Table) -> str: 1436 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1437 # we need to make sure the correct quoting is used in each case. 1438 # 1439 # For example, if there is a CTE x that clashes with a schema name, then the former will 1440 # return the table y in that schema, whereas the latter will return the CTE's y column: 1441 # 1442 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1443 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1444 if expression.meta.get("quoted_table"): 1445 table_parts = ".".join(p.name for p in expression.parts) 1446 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1447 1448 return super().table_parts(expression) 1449 1450 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1451 this = expression.this 1452 if isinstance(this, exp.TsOrDsToDatetime): 1453 func_name = "FORMAT_DATETIME" 1454 elif isinstance(this, exp.TsOrDsToTimestamp): 1455 func_name = "FORMAT_TIMESTAMP" 1456 elif isinstance(this, exp.TsOrDsToTime): 1457 func_name = "FORMAT_TIME" 1458 else: 1459 func_name = "FORMAT_DATE" 1460 1461 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1462 return self.func( 1463 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1464 ) 1465 1466 def eq_sql(self, expression: exp.EQ) -> str: 1467 # Operands of = cannot be NULL in BigQuery 1468 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1469 if not isinstance(expression.parent, exp.Update): 1470 return "NULL" 1471 1472 return self.binary(expression, "=") 1473 1474 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1475 parent = expression.parent 1476 1477 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1478 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1479 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1480 return self.func( 1481 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1482 ) 1483 1484 return super().attimezone_sql(expression) 1485 1486 def trycast_sql(self, expression: exp.TryCast) -> str: 1487 return self.cast_sql(expression, safe_prefix="SAFE_") 1488 1489 def bracket_sql(self, expression: exp.Bracket) -> str: 1490 this = expression.this 1491 expressions = expression.expressions 1492 1493 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1494 arg = expressions[0] 1495 if arg.type is None: 1496 from sqlglot.optimizer.annotate_types import annotate_types 1497 1498 arg = annotate_types(arg, dialect=self.dialect) 1499 1500 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1501 # BQ doesn't support bracket syntax with string values for structs 1502 return f"{self.sql(this)}.{arg.name}" 1503 1504 expressions_sql = self.expressions(expression, flat=True) 1505 offset = expression.args.get("offset") 1506 1507 if offset == 0: 1508 expressions_sql = f"OFFSET({expressions_sql})" 1509 elif offset == 1: 1510 expressions_sql = f"ORDINAL({expressions_sql})" 1511 elif offset is not None: 1512 self.unsupported(f"Unsupported array offset: {offset}") 1513 1514 if expression.args.get("safe"): 1515 expressions_sql = f"SAFE_{expressions_sql}" 1516 1517 return f"{self.sql(this)}[{expressions_sql}]" 1518 1519 def in_unnest_op(self, expression: exp.Unnest) -> str: 1520 return self.sql(expression) 1521 1522 def version_sql(self, expression: exp.Version) -> str: 1523 if expression.name == "TIMESTAMP": 1524 expression.set("this", "SYSTEM_TIME") 1525 return super().version_sql(expression) 1526 1527 def contains_sql(self, expression: exp.Contains) -> str: 1528 this = expression.this 1529 expr = expression.expression 1530 1531 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1532 this = this.this 1533 expr = expr.this 1534 1535 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1536 1537 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1538 this = expression.this 1539 1540 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1541 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1542 # because they aren't literals and so the above syntax is invalid BigQuery. 1543 if isinstance(this, exp.Array): 1544 elem = seq_get(this.expressions, 0) 1545 if not (elem and elem.find(exp.Query)): 1546 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1547 1548 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Whether byte string literals (ex: BigQuery's b'...') are typed as BYTES/BINARY
Whether to annotate all scopes during optimization. Used by BigQuery for UNNEST support.
Whether projection alias names can shadow table/source names in GROUP BY and HAVING clauses.
In BigQuery, when a projection alias has the same name as a source table, the alias takes precedence in GROUP BY and HAVING clauses, and the table becomes inaccessible by that name.
For example, in BigQuery: SELECT id, ARRAY_AGG(col) AS custom_fields FROM custom_fields GROUP BY id HAVING id >= 1
The "custom_fields" source is shadowed by the projection alias, so we cannot qualify "id" with "custom_fields" in GROUP BY/HAVING.
Whether table names can be referenced as columns (treated as structs).
BigQuery allows tables to be referenced as columns in queries, automatically treating them as struct values containing all the table's columns.
For example, in BigQuery: SELECT t FROM my_table AS t -- Returns entire row as a struct
Whether the dialect supports expanding struct fields using star notation (e.g., struct_col.*).
BigQuery allows struct fields to be expanded with the star operator:
SELECT t.struct_col.* FROM table t
RisingWave also allows struct field expansion with the star operator using parentheses:
SELECT (t.struct_col).* FROM table t
This expands to all fields within the struct.
Whether pseudocolumns should be excluded from star expansion (SELECT *).
Pseudocolumns are special dialect-specific columns (e.g., Oracle's ROWNUM, ROWID, LEVEL, or BigQuery's _PARTITIONTIME, _PARTITIONDATE) that are implicitly available but not part of the table schema. When this is True, SELECT * will not include these pseudocolumns; they must be explicitly selected.
Whether query results are typed as structs in metadata for type inference.
In BigQuery, subqueries store their column types as a STRUCT in metadata,
enabling special type inference for ARRAY(SELECT ...) expressions:
ARRAY(SELECT x, y FROM t) → ARRAY For single column subqueries, BigQuery unwraps the struct:
ARRAY(SELECT x FROM t) → ARRAY This is metadata-only for type inference.
Whether JSON_EXTRACT_SCALAR returns null if a non-scalar value is selected.
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
The default type of NULL for producing the correct projection type.
For example, in BigQuery the default type of the NULL value is INT64.
Whether to prioritize non-literal types over literals during type annotation.
Whether the table alias comes after version (timestamp or iceberg snapshot).
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
474 def normalize_identifier(self, expression: E) -> E: 475 if ( 476 isinstance(expression, exp.Identifier) 477 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 478 ): 479 parent = expression.parent 480 while isinstance(parent, exp.Dot): 481 parent = parent.parent 482 483 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 484 # by default. The following check uses a heuristic to detect tables based on whether 485 # they are qualified. This should generally be correct, because tables in BigQuery 486 # must be qualified with at least a dataset, unless @@dataset_id is set. 487 case_sensitive = ( 488 isinstance(parent, exp.UserDefinedFunction) 489 or ( 490 isinstance(parent, exp.Table) 491 and parent.db 492 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 493 ) 494 or expression.meta.get("is_table") 495 ) 496 if not case_sensitive: 497 expression.set("this", expression.this.lower()) 498 499 return t.cast(E, expression) 500 501 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
503 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 504 VAR_TOKENS = { 505 TokenType.DASH, 506 TokenType.VAR, 507 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens
509 class Tokenizer(tokens.Tokenizer): 510 QUOTES = ["'", '"', '"""', "'''"] 511 COMMENTS = ["--", "#", ("/*", "*/")] 512 IDENTIFIERS = ["`"] 513 STRING_ESCAPES = ["\\"] 514 515 HEX_STRINGS = [("0x", ""), ("0X", "")] 516 517 BYTE_STRINGS = [ 518 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 519 ] 520 521 RAW_STRINGS = [ 522 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 523 ] 524 525 NESTED_COMMENTS = False 526 527 KEYWORDS = { 528 **tokens.Tokenizer.KEYWORDS, 529 "ANY TYPE": TokenType.VARIANT, 530 "BEGIN": TokenType.COMMAND, 531 "BEGIN TRANSACTION": TokenType.BEGIN, 532 "BYTEINT": TokenType.INT, 533 "BYTES": TokenType.BINARY, 534 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 535 "DATETIME": TokenType.TIMESTAMP, 536 "DECLARE": TokenType.DECLARE, 537 "ELSEIF": TokenType.COMMAND, 538 "EXCEPTION": TokenType.COMMAND, 539 "EXPORT": TokenType.EXPORT, 540 "FLOAT64": TokenType.DOUBLE, 541 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 542 "LOOP": TokenType.COMMAND, 543 "MODEL": TokenType.MODEL, 544 "NOT DETERMINISTIC": TokenType.VOLATILE, 545 "RECORD": TokenType.STRUCT, 546 "REPEAT": TokenType.COMMAND, 547 "TIMESTAMP": TokenType.TIMESTAMPTZ, 548 "WHILE": TokenType.COMMAND, 549 } 550 KEYWORDS.pop("DIV") 551 KEYWORDS.pop("VALUES") 552 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- tokenize
- sql
- size
- tokens
554 class Parser(parser.Parser): 555 PREFIXED_PIVOT_COLUMNS = True 556 LOG_DEFAULTS_TO_LN = True 557 SUPPORTS_IMPLICIT_UNNEST = True 558 JOINS_HAVE_EQUAL_PRECEDENCE = True 559 560 # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier 561 ID_VAR_TOKENS = { 562 *parser.Parser.ID_VAR_TOKENS, 563 TokenType.GRANT, 564 } - {TokenType.ASC, TokenType.DESC} 565 566 ALIAS_TOKENS = { 567 *parser.Parser.ALIAS_TOKENS, 568 TokenType.GRANT, 569 } - {TokenType.ASC, TokenType.DESC} 570 571 TABLE_ALIAS_TOKENS = { 572 *parser.Parser.TABLE_ALIAS_TOKENS, 573 TokenType.GRANT, 574 } - {TokenType.ASC, TokenType.DESC} 575 576 COMMENT_TABLE_ALIAS_TOKENS = { 577 *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, 578 TokenType.GRANT, 579 } - {TokenType.ASC, TokenType.DESC} 580 581 UPDATE_ALIAS_TOKENS = { 582 *parser.Parser.UPDATE_ALIAS_TOKENS, 583 TokenType.GRANT, 584 } - {TokenType.ASC, TokenType.DESC} 585 586 FUNCTIONS = { 587 **parser.Parser.FUNCTIONS, 588 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 589 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 590 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 591 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 592 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 593 "BOOL": exp.JSONBool.from_arg_list, 594 "CONTAINS_SUBSTR": _build_contains_substring, 595 "DATE": _build_date, 596 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 597 "DATE_DIFF": build_date_diff, 598 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 599 "DATE_TRUNC": lambda args: exp.DateTrunc( 600 unit=seq_get(args, 1), 601 this=seq_get(args, 0), 602 zone=seq_get(args, 2), 603 ), 604 "DATETIME": _build_datetime, 605 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 606 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 607 "DIV": binary_from_function(exp.IntDiv), 608 "EDIT_DISTANCE": _build_levenshtein, 609 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 610 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 611 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 612 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 613 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 614 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 615 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 616 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 617 "JSON_STRIP_NULLS": _build_json_strip_nulls, 618 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 619 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 620 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 621 "MD5": exp.MD5Digest.from_arg_list, 622 "SHA1": exp.SHA1Digest.from_arg_list, 623 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 624 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 625 ), 626 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 627 "TO_HEX": _build_to_hex, 628 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 629 [seq_get(args, 1), seq_get(args, 0)] 630 ), 631 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 632 [seq_get(args, 1), seq_get(args, 0)] 633 ), 634 "PARSE_TIMESTAMP": _build_parse_timestamp, 635 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 636 [seq_get(args, 1), seq_get(args, 0)] 637 ), 638 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 639 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 640 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 641 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 642 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 643 ), 644 "SHA256": lambda args: exp.SHA2Digest( 645 this=seq_get(args, 0), length=exp.Literal.number(256) 646 ), 647 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 648 "SPLIT": lambda args: exp.Split( 649 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 650 this=seq_get(args, 0), 651 expression=seq_get(args, 1) or exp.Literal.string(","), 652 ), 653 "STRPOS": exp.StrPosition.from_arg_list, 654 "TIME": _build_time, 655 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 656 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 657 "TIMESTAMP": _build_timestamp, 658 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 659 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 660 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 661 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 662 ), 663 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 664 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 665 ), 666 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 667 "TO_JSON": lambda args: exp.JSONFormat( 668 this=seq_get(args, 0), options=seq_get(args, 1), to_json=True 669 ), 670 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 671 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 672 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 673 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 674 "FROM_HEX": exp.Unhex.from_arg_list, 675 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 676 } 677 # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function 678 FUNCTIONS.pop("SEARCH") 679 680 FUNCTION_PARSERS = { 681 **parser.Parser.FUNCTION_PARSERS, 682 "ARRAY": lambda self: self.expression( 683 exp.Array, 684 expressions=[self._parse_statement()], 685 struct_name_inheritance=True, 686 ), 687 "JSON_ARRAY": lambda self: self.expression( 688 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 689 ), 690 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 691 "PREDICT": lambda self: self._parse_ml(exp.Predict), 692 "TRANSLATE": lambda self: self._parse_translate(), 693 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 694 "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), 695 "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml( 696 exp.GenerateEmbedding, is_text=True 697 ), 698 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 699 "FORECAST": lambda self: self._parse_ml(exp.MLForecast), 700 } 701 FUNCTION_PARSERS.pop("TRIM") 702 703 NO_PAREN_FUNCTIONS = { 704 **parser.Parser.NO_PAREN_FUNCTIONS, 705 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 706 } 707 708 NESTED_TYPE_TOKENS = { 709 *parser.Parser.NESTED_TYPE_TOKENS, 710 TokenType.TABLE, 711 } 712 713 PROPERTY_PARSERS = { 714 **parser.Parser.PROPERTY_PARSERS, 715 "NOT DETERMINISTIC": lambda self: self.expression( 716 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 717 ), 718 "OPTIONS": lambda self: self._parse_with_property(), 719 } 720 721 CONSTRAINT_PARSERS = { 722 **parser.Parser.CONSTRAINT_PARSERS, 723 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 724 } 725 726 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 727 RANGE_PARSERS.pop(TokenType.OVERLAPS) 728 729 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 730 731 STATEMENT_PARSERS = { 732 **parser.Parser.STATEMENT_PARSERS, 733 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 734 TokenType.END: lambda self: self._parse_as_command(self._prev), 735 TokenType.FOR: lambda self: self._parse_for_in(), 736 TokenType.EXPORT: lambda self: self._parse_export_data(), 737 TokenType.DECLARE: lambda self: self._parse_declare(), 738 } 739 740 BRACKET_OFFSETS = { 741 "OFFSET": (0, False), 742 "ORDINAL": (1, False), 743 "SAFE_OFFSET": (0, True), 744 "SAFE_ORDINAL": (1, True), 745 } 746 747 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 748 index = self._index 749 this = self._parse_range() 750 self._match_text_seq("DO") 751 if self._match(TokenType.COMMAND): 752 self._retreat(index) 753 return self._parse_as_command(self._prev) 754 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 755 756 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 757 this = super()._parse_table_part(schema=schema) or self._parse_number() 758 759 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 760 if isinstance(this, exp.Identifier): 761 table_name = this.name 762 while self._match(TokenType.DASH, advance=False) and self._next: 763 start = self._curr 764 while self._is_connected() and not self._match_set( 765 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 766 ): 767 self._advance() 768 769 if start == self._curr: 770 break 771 772 table_name += self._find_sql(start, self._prev) 773 774 this = exp.Identifier( 775 this=table_name, quoted=this.args.get("quoted") 776 ).update_positions(this) 777 elif isinstance(this, exp.Literal): 778 table_name = this.name 779 780 if self._is_connected() and self._parse_var(any_token=True): 781 table_name += self._prev.text 782 783 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 784 785 return this 786 787 def _parse_table_parts( 788 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 789 ) -> exp.Table: 790 table = super()._parse_table_parts( 791 schema=schema, is_db_reference=is_db_reference, wildcard=True 792 ) 793 794 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 795 if not table.catalog: 796 if table.db: 797 previous_db = table.args["db"] 798 parts = table.db.split(".") 799 if len(parts) == 2 and not table.args["db"].quoted: 800 table.set( 801 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 802 ) 803 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 804 else: 805 previous_this = table.this 806 parts = table.name.split(".") 807 if len(parts) == 2 and not table.this.quoted: 808 table.set( 809 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 810 ) 811 table.set( 812 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 813 ) 814 815 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 816 alias = table.this 817 catalog, db, this, *rest = ( 818 exp.to_identifier(p, quoted=True) 819 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 820 ) 821 822 for part in (catalog, db, this): 823 if part: 824 part.update_positions(table.this) 825 826 if rest and this: 827 this = exp.Dot.build([this, *rest]) # type: ignore 828 829 table = exp.Table( 830 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 831 ) 832 table.meta["quoted_table"] = True 833 else: 834 alias = None 835 836 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 837 # dataset, so if the project identifier is omitted we need to fix the ast so that 838 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 839 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 840 # views, because it would seem like the "catalog" part is set, when it'd actually 841 # be the region/dataset. Merging the two identifiers into a single one is done to 842 # avoid producing a 4-part Table reference, which would cause issues in the schema 843 # module, when there are 3-part table names mixed with information schema views. 844 # 845 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 846 table_parts = table.parts 847 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 848 # We need to alias the table here to avoid breaking existing qualified columns. 849 # This is expected to be safe, because if there's an actual alias coming up in 850 # the token stream, it will overwrite this one. If there isn't one, we are only 851 # exposing the name that can be used to reference the view explicitly (a no-op). 852 exp.alias_( 853 table, 854 t.cast(exp.Identifier, alias or table_parts[-1]), 855 table=True, 856 copy=False, 857 ) 858 859 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 860 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 861 line=table_parts[-2].meta.get("line"), 862 col=table_parts[-1].meta.get("col"), 863 start=table_parts[-2].meta.get("start"), 864 end=table_parts[-1].meta.get("end"), 865 ) 866 table.set("this", new_this) 867 table.set("db", seq_get(table_parts, -3)) 868 table.set("catalog", seq_get(table_parts, -4)) 869 870 return table 871 872 def _parse_column(self) -> t.Optional[exp.Expression]: 873 column = super()._parse_column() 874 if isinstance(column, exp.Column): 875 parts = column.parts 876 if any("." in p.name for p in parts): 877 catalog, db, table, this, *rest = ( 878 exp.to_identifier(p, quoted=True) 879 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 880 ) 881 882 if rest and this: 883 this = exp.Dot.build([this, *rest]) # type: ignore 884 885 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 886 column.meta["quoted_column"] = True 887 888 return column 889 890 @t.overload 891 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 892 893 @t.overload 894 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 895 896 def _parse_json_object(self, agg=False): 897 json_object = super()._parse_json_object() 898 array_kv_pair = seq_get(json_object.expressions, 0) 899 900 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 901 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 902 if ( 903 array_kv_pair 904 and isinstance(array_kv_pair.this, exp.Array) 905 and isinstance(array_kv_pair.expression, exp.Array) 906 ): 907 keys = array_kv_pair.this.expressions 908 values = array_kv_pair.expression.expressions 909 910 json_object.set( 911 "expressions", 912 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 913 ) 914 915 return json_object 916 917 def _parse_bracket( 918 self, this: t.Optional[exp.Expression] = None 919 ) -> t.Optional[exp.Expression]: 920 bracket = super()._parse_bracket(this) 921 922 if isinstance(bracket, exp.Array): 923 bracket.set("struct_name_inheritance", True) 924 925 if this is bracket: 926 return bracket 927 928 if isinstance(bracket, exp.Bracket): 929 for expression in bracket.expressions: 930 name = expression.name.upper() 931 932 if name not in self.BRACKET_OFFSETS: 933 break 934 935 offset, safe = self.BRACKET_OFFSETS[name] 936 bracket.set("offset", offset) 937 bracket.set("safe", safe) 938 expression.replace(expression.expressions[0]) 939 940 return bracket 941 942 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 943 unnest = super()._parse_unnest(with_alias=with_alias) 944 945 if not unnest: 946 return None 947 948 unnest_expr = seq_get(unnest.expressions, 0) 949 if unnest_expr: 950 from sqlglot.optimizer.annotate_types import annotate_types 951 952 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 953 954 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 955 # in contrast to other dialects such as DuckDB which flattens only the array by default 956 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 957 array_elem.is_type(exp.DataType.Type.STRUCT) 958 for array_elem in unnest_expr._type.expressions 959 ): 960 unnest.set("explode_array", True) 961 962 return unnest 963 964 def _parse_make_interval(self) -> exp.MakeInterval: 965 expr = exp.MakeInterval() 966 967 for arg_key in MAKE_INTERVAL_KWARGS: 968 value = self._parse_lambda() 969 970 if not value: 971 break 972 973 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 974 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 975 if isinstance(value, exp.Kwarg): 976 arg_key = value.this.name 977 978 expr.set(arg_key, value) 979 980 self._match(TokenType.COMMA) 981 982 return expr 983 984 def _parse_ml(self, expr_type: t.Type[E], **kwargs) -> E: 985 self._match_text_seq("MODEL") 986 this = self._parse_table() 987 988 self._match(TokenType.COMMA) 989 self._match_text_seq("TABLE") 990 991 # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one 992 expression = ( 993 self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None 994 ) 995 996 self._match(TokenType.COMMA) 997 998 return self.expression( 999 expr_type, 1000 this=this, 1001 expression=expression, 1002 params_struct=self._parse_bitwise(), 1003 **kwargs, 1004 ) 1005 1006 def _parse_translate(self) -> exp.Translate | exp.MLTranslate: 1007 # Check if this is ML.TRANSLATE by looking at previous tokens 1008 token = seq_get(self._tokens, self._index - 4) 1009 if token and token.text.upper() == "ML": 1010 return self._parse_ml(exp.MLTranslate) 1011 1012 return exp.Translate.from_arg_list(self._parse_function_args()) 1013 1014 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1015 self._match(TokenType.TABLE) 1016 this = self._parse_table() 1017 1018 expr = self.expression(exp.FeaturesAtTime, this=this) 1019 1020 while self._match(TokenType.COMMA): 1021 arg = self._parse_lambda() 1022 1023 # Get the LHS of the Kwarg and set the arg to that value, e.g 1024 # "num_rows => 1" sets the expr's `num_rows` arg 1025 if arg: 1026 expr.set(arg.this.name, arg) 1027 1028 return expr 1029 1030 def _parse_vector_search(self) -> exp.VectorSearch: 1031 self._match(TokenType.TABLE) 1032 base_table = self._parse_table() 1033 1034 self._match(TokenType.COMMA) 1035 1036 column_to_search = self._parse_bitwise() 1037 self._match(TokenType.COMMA) 1038 1039 self._match(TokenType.TABLE) 1040 query_table = self._parse_table() 1041 1042 expr = self.expression( 1043 exp.VectorSearch, 1044 this=base_table, 1045 column_to_search=column_to_search, 1046 query_table=query_table, 1047 ) 1048 1049 while self._match(TokenType.COMMA): 1050 # query_column_to_search can be named argument or positional 1051 if self._match(TokenType.STRING, advance=False): 1052 query_column = self._parse_string() 1053 expr.set("query_column_to_search", query_column) 1054 else: 1055 arg = self._parse_lambda() 1056 if arg: 1057 expr.set(arg.this.name, arg) 1058 1059 return expr 1060 1061 def _parse_export_data(self) -> exp.Export: 1062 self._match_text_seq("DATA") 1063 1064 return self.expression( 1065 exp.Export, 1066 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1067 options=self._parse_properties(), 1068 this=self._match_text_seq("AS") and self._parse_select(), 1069 ) 1070 1071 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 1072 func_index = self._index + 1 1073 this = super()._parse_column_ops(this) 1074 1075 if isinstance(this, exp.Dot) and isinstance(this.expression, exp.Func): 1076 prefix = this.this.name.upper() 1077 1078 func: t.Optional[t.Type[exp.Func]] = None 1079 if prefix == "NET": 1080 func = exp.NetFunc 1081 elif prefix == "SAFE": 1082 func = exp.SafeFunc 1083 1084 if func: 1085 # Retreat to try and parse a known function instead of an anonymous one, 1086 # which is parsed by the base column ops parser due to anonymous_func=true 1087 self._retreat(func_index) 1088 this = func(this=self._parse_function(any_token=True)) 1089 1090 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- raise_error
- validate_expression
- reset
- errors
- error_level
- error_message_context
- max_errors
- dialect
- sql
- parse
- parse_into
- check_errors
- expression
- parse_set_operation
- build_cast
1092 class Generator(generator.Generator): 1093 INTERVAL_ALLOWS_PLURAL_FORM = False 1094 JOIN_HINTS = False 1095 QUERY_HINTS = False 1096 TABLE_HINTS = False 1097 LIMIT_FETCH = "LIMIT" 1098 RENAME_TABLE_WITH_DB = False 1099 NVL2_SUPPORTED = False 1100 UNNEST_WITH_ORDINALITY = False 1101 COLLATE_IS_FUNC = True 1102 LIMIT_ONLY_LITERALS = True 1103 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1104 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1105 JSON_KEY_VALUE_PAIR_SEP = "," 1106 NULL_ORDERING_SUPPORTED = False 1107 IGNORE_NULLS_IN_FUNC = True 1108 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1109 CAN_IMPLEMENT_ARRAY_ANY = True 1110 SUPPORTS_TO_NUMBER = False 1111 NAMED_PLACEHOLDER_TOKEN = "@" 1112 HEX_FUNC = "TO_HEX" 1113 WITH_PROPERTIES_PREFIX = "OPTIONS" 1114 SUPPORTS_EXPLODING_PROJECTIONS = False 1115 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1116 SUPPORTS_UNIX_SECONDS = True 1117 DECLARE_DEFAULT_ASSIGNMENT = "DEFAULT" 1118 1119 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1120 1121 TS_OR_DS_TYPES = ( 1122 exp.TsOrDsToDatetime, 1123 exp.TsOrDsToTimestamp, 1124 exp.TsOrDsToTime, 1125 exp.TsOrDsToDate, 1126 ) 1127 1128 TRANSFORMS = { 1129 **generator.Generator.TRANSFORMS, 1130 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1131 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1132 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1133 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1134 exp.Array: inline_array_unless_query, 1135 exp.ArrayContains: _array_contains_sql, 1136 exp.ArrayFilter: filter_array_using_unnest, 1137 exp.ArrayRemove: filter_array_using_unnest, 1138 exp.BitwiseAndAgg: rename_func("BIT_AND"), 1139 exp.BitwiseOrAgg: rename_func("BIT_OR"), 1140 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 1141 exp.BitwiseCount: rename_func("BIT_COUNT"), 1142 exp.ByteLength: rename_func("BYTE_LENGTH"), 1143 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1144 exp.CollateProperty: lambda self, e: ( 1145 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1146 if e.args.get("default") 1147 else f"COLLATE {self.sql(e, 'this')}" 1148 ), 1149 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1150 exp.CountIf: rename_func("COUNTIF"), 1151 exp.Create: _create_sql, 1152 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1153 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1154 exp.DateDiff: lambda self, e: self.func( 1155 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1156 ), 1157 exp.DateFromParts: rename_func("DATE"), 1158 exp.DateStrToDate: datestrtodate_sql, 1159 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1160 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1161 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1162 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1163 exp.FromTimeZone: lambda self, e: self.func( 1164 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1165 ), 1166 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1167 exp.GroupConcat: lambda self, e: groupconcat_sql( 1168 self, e, func_name="STRING_AGG", within_group=False, sep=None 1169 ), 1170 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1171 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1172 exp.If: if_sql(false_value="NULL"), 1173 exp.ILike: no_ilike_sql, 1174 exp.IntDiv: rename_func("DIV"), 1175 exp.Int64: rename_func("INT64"), 1176 exp.JSONBool: rename_func("BOOL"), 1177 exp.JSONExtract: _json_extract_sql, 1178 exp.JSONExtractArray: _json_extract_sql, 1179 exp.JSONExtractScalar: _json_extract_sql, 1180 exp.JSONFormat: lambda self, e: self.func( 1181 "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING", 1182 e.this, 1183 e.args.get("options"), 1184 ), 1185 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1186 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1187 exp.Levenshtein: _levenshtein_sql, 1188 exp.Max: max_or_greatest, 1189 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1190 exp.MD5Digest: rename_func("MD5"), 1191 exp.Min: min_or_least, 1192 exp.Normalize: lambda self, e: self.func( 1193 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1194 e.this, 1195 e.args.get("form"), 1196 ), 1197 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1198 exp.RegexpExtract: lambda self, e: self.func( 1199 "REGEXP_EXTRACT", 1200 e.this, 1201 e.expression, 1202 e.args.get("position"), 1203 e.args.get("occurrence"), 1204 ), 1205 exp.RegexpExtractAll: lambda self, e: self.func( 1206 "REGEXP_EXTRACT_ALL", e.this, e.expression 1207 ), 1208 exp.RegexpReplace: regexp_replace_sql, 1209 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1210 exp.ReturnsProperty: _returnsproperty_sql, 1211 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1212 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1213 exp.ParseDatetime: lambda self, e: self.func( 1214 "PARSE_DATETIME", self.format_time(e), e.this 1215 ), 1216 exp.Select: transforms.preprocess( 1217 [ 1218 transforms.explode_projection_to_unnest(), 1219 transforms.unqualify_unnest, 1220 transforms.eliminate_distinct_on, 1221 _alias_ordered_group, 1222 transforms.eliminate_semi_and_anti_joins, 1223 ] 1224 ), 1225 exp.SHA: rename_func("SHA1"), 1226 exp.SHA2: sha256_sql, 1227 exp.SHA1Digest: rename_func("SHA1"), 1228 exp.SHA2Digest: sha2_digest_sql, 1229 exp.StabilityProperty: lambda self, e: ( 1230 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1231 ), 1232 exp.String: rename_func("STRING"), 1233 exp.StrPosition: lambda self, e: ( 1234 strposition_sql( 1235 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1236 ) 1237 ), 1238 exp.StrToDate: _str_to_datetime_sql, 1239 exp.StrToTime: _str_to_datetime_sql, 1240 exp.SessionUser: lambda *_: "SESSION_USER()", 1241 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1242 exp.TimeFromParts: rename_func("TIME"), 1243 exp.TimestampFromParts: rename_func("DATETIME"), 1244 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1245 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1246 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1247 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1248 exp.TimeStrToTime: timestrtotime_sql, 1249 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1250 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1251 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1252 exp.TsOrDsToTime: rename_func("TIME"), 1253 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1254 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1255 exp.Unhex: rename_func("FROM_HEX"), 1256 exp.UnixDate: rename_func("UNIX_DATE"), 1257 exp.UnixToTime: _unix_to_time_sql, 1258 exp.Uuid: lambda *_: "GENERATE_UUID()", 1259 exp.Values: _derived_table_values_to_unnest, 1260 exp.VariancePop: rename_func("VAR_POP"), 1261 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1262 } 1263 1264 SUPPORTED_JSON_PATH_PARTS = { 1265 exp.JSONPathKey, 1266 exp.JSONPathRoot, 1267 exp.JSONPathSubscript, 1268 } 1269 1270 TYPE_MAPPING = { 1271 **generator.Generator.TYPE_MAPPING, 1272 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1273 exp.DataType.Type.BIGINT: "INT64", 1274 exp.DataType.Type.BINARY: "BYTES", 1275 exp.DataType.Type.BLOB: "BYTES", 1276 exp.DataType.Type.BOOLEAN: "BOOL", 1277 exp.DataType.Type.CHAR: "STRING", 1278 exp.DataType.Type.DECIMAL: "NUMERIC", 1279 exp.DataType.Type.DOUBLE: "FLOAT64", 1280 exp.DataType.Type.FLOAT: "FLOAT64", 1281 exp.DataType.Type.INT: "INT64", 1282 exp.DataType.Type.NCHAR: "STRING", 1283 exp.DataType.Type.NVARCHAR: "STRING", 1284 exp.DataType.Type.SMALLINT: "INT64", 1285 exp.DataType.Type.TEXT: "STRING", 1286 exp.DataType.Type.TIMESTAMP: "DATETIME", 1287 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1288 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1289 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1290 exp.DataType.Type.TINYINT: "INT64", 1291 exp.DataType.Type.ROWVERSION: "BYTES", 1292 exp.DataType.Type.UUID: "STRING", 1293 exp.DataType.Type.VARBINARY: "BYTES", 1294 exp.DataType.Type.VARCHAR: "STRING", 1295 exp.DataType.Type.VARIANT: "ANY TYPE", 1296 } 1297 1298 PROPERTIES_LOCATION = { 1299 **generator.Generator.PROPERTIES_LOCATION, 1300 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1301 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1302 } 1303 1304 # WINDOW comes after QUALIFY 1305 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1306 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1307 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1308 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1309 } 1310 1311 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1312 RESERVED_KEYWORDS = { 1313 "all", 1314 "and", 1315 "any", 1316 "array", 1317 "as", 1318 "asc", 1319 "assert_rows_modified", 1320 "at", 1321 "between", 1322 "by", 1323 "case", 1324 "cast", 1325 "collate", 1326 "contains", 1327 "create", 1328 "cross", 1329 "cube", 1330 "current", 1331 "default", 1332 "define", 1333 "desc", 1334 "distinct", 1335 "else", 1336 "end", 1337 "enum", 1338 "escape", 1339 "except", 1340 "exclude", 1341 "exists", 1342 "extract", 1343 "false", 1344 "fetch", 1345 "following", 1346 "for", 1347 "from", 1348 "full", 1349 "group", 1350 "grouping", 1351 "groups", 1352 "hash", 1353 "having", 1354 "if", 1355 "ignore", 1356 "in", 1357 "inner", 1358 "intersect", 1359 "interval", 1360 "into", 1361 "is", 1362 "join", 1363 "lateral", 1364 "left", 1365 "like", 1366 "limit", 1367 "lookup", 1368 "merge", 1369 "natural", 1370 "new", 1371 "no", 1372 "not", 1373 "null", 1374 "nulls", 1375 "of", 1376 "on", 1377 "or", 1378 "order", 1379 "outer", 1380 "over", 1381 "partition", 1382 "preceding", 1383 "proto", 1384 "qualify", 1385 "range", 1386 "recursive", 1387 "respect", 1388 "right", 1389 "rollup", 1390 "rows", 1391 "select", 1392 "set", 1393 "some", 1394 "struct", 1395 "tablesample", 1396 "then", 1397 "to", 1398 "treat", 1399 "true", 1400 "unbounded", 1401 "union", 1402 "unnest", 1403 "using", 1404 "when", 1405 "where", 1406 "window", 1407 "with", 1408 "within", 1409 } 1410 1411 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1412 unit = expression.unit 1413 unit_sql = unit.name if unit.is_string else self.sql(unit) 1414 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1415 1416 def mod_sql(self, expression: exp.Mod) -> str: 1417 this = expression.this 1418 expr = expression.expression 1419 return self.func( 1420 "MOD", 1421 this.unnest() if isinstance(this, exp.Paren) else this, 1422 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1423 ) 1424 1425 def column_parts(self, expression: exp.Column) -> str: 1426 if expression.meta.get("quoted_column"): 1427 # If a column reference is of the form `dataset.table`.name, we need 1428 # to preserve the quoted table path, otherwise the reference breaks 1429 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1430 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1431 return f"{table_path}.{self.sql(expression, 'this')}" 1432 1433 return super().column_parts(expression) 1434 1435 def table_parts(self, expression: exp.Table) -> str: 1436 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1437 # we need to make sure the correct quoting is used in each case. 1438 # 1439 # For example, if there is a CTE x that clashes with a schema name, then the former will 1440 # return the table y in that schema, whereas the latter will return the CTE's y column: 1441 # 1442 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1443 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1444 if expression.meta.get("quoted_table"): 1445 table_parts = ".".join(p.name for p in expression.parts) 1446 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1447 1448 return super().table_parts(expression) 1449 1450 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1451 this = expression.this 1452 if isinstance(this, exp.TsOrDsToDatetime): 1453 func_name = "FORMAT_DATETIME" 1454 elif isinstance(this, exp.TsOrDsToTimestamp): 1455 func_name = "FORMAT_TIMESTAMP" 1456 elif isinstance(this, exp.TsOrDsToTime): 1457 func_name = "FORMAT_TIME" 1458 else: 1459 func_name = "FORMAT_DATE" 1460 1461 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1462 return self.func( 1463 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1464 ) 1465 1466 def eq_sql(self, expression: exp.EQ) -> str: 1467 # Operands of = cannot be NULL in BigQuery 1468 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1469 if not isinstance(expression.parent, exp.Update): 1470 return "NULL" 1471 1472 return self.binary(expression, "=") 1473 1474 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1475 parent = expression.parent 1476 1477 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1478 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1479 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1480 return self.func( 1481 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1482 ) 1483 1484 return super().attimezone_sql(expression) 1485 1486 def trycast_sql(self, expression: exp.TryCast) -> str: 1487 return self.cast_sql(expression, safe_prefix="SAFE_") 1488 1489 def bracket_sql(self, expression: exp.Bracket) -> str: 1490 this = expression.this 1491 expressions = expression.expressions 1492 1493 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1494 arg = expressions[0] 1495 if arg.type is None: 1496 from sqlglot.optimizer.annotate_types import annotate_types 1497 1498 arg = annotate_types(arg, dialect=self.dialect) 1499 1500 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1501 # BQ doesn't support bracket syntax with string values for structs 1502 return f"{self.sql(this)}.{arg.name}" 1503 1504 expressions_sql = self.expressions(expression, flat=True) 1505 offset = expression.args.get("offset") 1506 1507 if offset == 0: 1508 expressions_sql = f"OFFSET({expressions_sql})" 1509 elif offset == 1: 1510 expressions_sql = f"ORDINAL({expressions_sql})" 1511 elif offset is not None: 1512 self.unsupported(f"Unsupported array offset: {offset}") 1513 1514 if expression.args.get("safe"): 1515 expressions_sql = f"SAFE_{expressions_sql}" 1516 1517 return f"{self.sql(this)}[{expressions_sql}]" 1518 1519 def in_unnest_op(self, expression: exp.Unnest) -> str: 1520 return self.sql(expression) 1521 1522 def version_sql(self, expression: exp.Version) -> str: 1523 if expression.name == "TIMESTAMP": 1524 expression.set("this", "SYSTEM_TIME") 1525 return super().version_sql(expression) 1526 1527 def contains_sql(self, expression: exp.Contains) -> str: 1528 this = expression.this 1529 expr = expression.expression 1530 1531 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1532 this = this.this 1533 expr = expr.this 1534 1535 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1536 1537 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1538 this = expression.this 1539 1540 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1541 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1542 # because they aren't literals and so the above syntax is invalid BigQuery. 1543 if isinstance(this, exp.Array): 1544 elem = seq_get(this.expressions, 0) 1545 if not (elem and elem.find(exp.Query)): 1546 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1547 1548 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1425 def column_parts(self, expression: exp.Column) -> str: 1426 if expression.meta.get("quoted_column"): 1427 # If a column reference is of the form `dataset.table`.name, we need 1428 # to preserve the quoted table path, otherwise the reference breaks 1429 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1430 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1431 return f"{table_path}.{self.sql(expression, 'this')}" 1432 1433 return super().column_parts(expression)
1435 def table_parts(self, expression: exp.Table) -> str: 1436 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1437 # we need to make sure the correct quoting is used in each case. 1438 # 1439 # For example, if there is a CTE x that clashes with a schema name, then the former will 1440 # return the table y in that schema, whereas the latter will return the CTE's y column: 1441 # 1442 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1443 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1444 if expression.meta.get("quoted_table"): 1445 table_parts = ".".join(p.name for p in expression.parts) 1446 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1447 1448 return super().table_parts(expression)
1450 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1451 this = expression.this 1452 if isinstance(this, exp.TsOrDsToDatetime): 1453 func_name = "FORMAT_DATETIME" 1454 elif isinstance(this, exp.TsOrDsToTimestamp): 1455 func_name = "FORMAT_TIMESTAMP" 1456 elif isinstance(this, exp.TsOrDsToTime): 1457 func_name = "FORMAT_TIME" 1458 else: 1459 func_name = "FORMAT_DATE" 1460 1461 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1462 return self.func( 1463 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1464 )
1466 def eq_sql(self, expression: exp.EQ) -> str: 1467 # Operands of = cannot be NULL in BigQuery 1468 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1469 if not isinstance(expression.parent, exp.Update): 1470 return "NULL" 1471 1472 return self.binary(expression, "=")
1474 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1475 parent = expression.parent 1476 1477 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1478 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1479 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1480 return self.func( 1481 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1482 ) 1483 1484 return super().attimezone_sql(expression)
1489 def bracket_sql(self, expression: exp.Bracket) -> str: 1490 this = expression.this 1491 expressions = expression.expressions 1492 1493 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1494 arg = expressions[0] 1495 if arg.type is None: 1496 from sqlglot.optimizer.annotate_types import annotate_types 1497 1498 arg = annotate_types(arg, dialect=self.dialect) 1499 1500 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1501 # BQ doesn't support bracket syntax with string values for structs 1502 return f"{self.sql(this)}.{arg.name}" 1503 1504 expressions_sql = self.expressions(expression, flat=True) 1505 offset = expression.args.get("offset") 1506 1507 if offset == 0: 1508 expressions_sql = f"OFFSET({expressions_sql})" 1509 elif offset == 1: 1510 expressions_sql = f"ORDINAL({expressions_sql})" 1511 elif offset is not None: 1512 self.unsupported(f"Unsupported array offset: {offset}") 1513 1514 if expression.args.get("safe"): 1515 expressions_sql = f"SAFE_{expressions_sql}" 1516 1517 return f"{self.sql(this)}[{expressions_sql}]"
1527 def contains_sql(self, expression: exp.Contains) -> str: 1528 this = expression.this 1529 expr = expression.expression 1530 1531 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1532 this = this.this 1533 expr = expr.this 1534 1535 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope"))
1537 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1538 this = expression.this 1539 1540 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1541 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1542 # because they aren't literals and so the above syntax is invalid BigQuery. 1543 if isinstance(this, exp.Array): 1544 elem = seq_get(this.expressions, 0) 1545 if not (elem and elem.find(exp.Query)): 1546 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1547 1548 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- INOUT_SEPARATOR
- DIRECTED_JOINS
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- triggerproperties_sql
- triggerreferencing_sql
- triggerevent_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql
- block_sql
- storedprocedure_sql
- ifblock_sql
- whileblock_sql
- execute_sql
- executesql_sql