sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7 8from sqlglot.optimizer.annotate_types import TypeAnnotator 9 10from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 11from sqlglot._typing import E 12from sqlglot.dialects.dialect import ( 13 Dialect, 14 NormalizationStrategy, 15 arg_max_or_min_no_count, 16 binary_from_function, 17 date_add_interval_sql, 18 datestrtodate_sql, 19 build_formatted_time, 20 filter_array_using_unnest, 21 if_sql, 22 inline_array_unless_query, 23 max_or_greatest, 24 min_or_least, 25 no_ilike_sql, 26 build_date_delta_with_interval, 27 regexp_replace_sql, 28 rename_func, 29 sha256_sql, 30 timestrtotime_sql, 31 ts_or_ds_add_cast, 32 unit_to_var, 33 strposition_sql, 34 groupconcat_sql, 35 sha2_digest_sql, 36) 37from sqlglot.generator import unsupported_args 38from sqlglot.helper import seq_get, split_num_words 39from sqlglot.tokens import TokenType 40from sqlglot.typing.bigquery import EXPRESSION_METADATA 41 42if t.TYPE_CHECKING: 43 from sqlglot._typing import Lit 44 45 from sqlglot.optimizer.annotate_types import TypeAnnotator 46 47logger = logging.getLogger("sqlglot") 48 49 50JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 51 52DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 53 54MAKE_INTERVAL_KWARGS = ["year", "month", "day", "hour", "minute", "second"] 55 56 57def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 58 if not expression.find_ancestor(exp.From, exp.Join): 59 return self.values_sql(expression) 60 61 structs = [] 62 alias = expression.args.get("alias") 63 for tup in expression.find_all(exp.Tuple): 64 field_aliases = ( 65 alias.columns 66 if alias and alias.columns 67 else (f"_c{i}" for i in range(len(tup.expressions))) 68 ) 69 expressions = [ 70 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 71 for name, fld in zip(field_aliases, tup.expressions) 72 ] 73 structs.append(exp.Struct(expressions=expressions)) 74 75 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 76 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 77 return self.unnest_sql( 78 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 79 ) 80 81 82def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 83 this = expression.this 84 if isinstance(this, exp.Schema): 85 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 86 else: 87 this = self.sql(this) 88 return f"RETURNS {this}" 89 90 91def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 92 returns = expression.find(exp.ReturnsProperty) 93 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 94 expression.set("kind", "TABLE FUNCTION") 95 96 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 97 expression.set("expression", expression.expression.this) 98 99 return self.create_sql(expression) 100 101 102# https://issuetracker.google.com/issues/162294746 103# workaround for bigquery bug when grouping by an expression and then ordering 104# WITH x AS (SELECT 1 y) 105# SELECT y + 1 z 106# FROM x 107# GROUP BY x + 1 108# ORDER by z 109def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 110 if isinstance(expression, exp.Select): 111 group = expression.args.get("group") 112 order = expression.args.get("order") 113 114 if group and order: 115 aliases = { 116 select.this: select.args["alias"] 117 for select in expression.selects 118 if isinstance(select, exp.Alias) 119 } 120 121 for grouped in group.expressions: 122 if grouped.is_int: 123 continue 124 alias = aliases.get(grouped) 125 if alias: 126 grouped.replace(exp.column(alias)) 127 128 return expression 129 130 131def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 132 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 133 if isinstance(expression, exp.CTE) and expression.alias_column_names: 134 cte_query = expression.this 135 136 if cte_query.is_star: 137 logger.warning( 138 "Can't push down CTE column names for star queries. Run the query through" 139 " the optimizer or use 'qualify' to expand the star projections first." 140 ) 141 return expression 142 143 column_names = expression.alias_column_names 144 expression.args["alias"].set("columns", None) 145 146 for name, select in zip(column_names, cte_query.selects): 147 to_replace = select 148 149 if isinstance(select, exp.Alias): 150 select = select.this 151 152 # Inner aliases are shadowed by the CTE column names 153 to_replace.replace(exp.alias_(select, name)) 154 155 return expression 156 157 158def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 159 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 160 this.set("zone", seq_get(args, 2)) 161 return this 162 163 164def _build_timestamp(args: t.List) -> exp.Timestamp: 165 timestamp = exp.Timestamp.from_arg_list(args) 166 timestamp.set("with_tz", True) 167 return timestamp 168 169 170def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 171 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 172 return expr_type.from_arg_list(args) 173 174 175def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 176 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 177 arg = seq_get(args, 0) 178 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 179 180 181def _build_json_strip_nulls(args: t.List) -> exp.JSONStripNulls: 182 expression = exp.JSONStripNulls(this=seq_get(args, 0)) 183 184 for arg in args[1:]: 185 if isinstance(arg, exp.Kwarg): 186 expression.set(arg.this.name.lower(), arg) 187 else: 188 expression.set("expression", arg) 189 190 return expression 191 192 193def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 194 return self.sql( 195 exp.Exists( 196 this=exp.select("1") 197 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 198 .where(exp.column("_col").eq(expression.right)) 199 ) 200 ) 201 202 203def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 204 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 205 206 207def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 208 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 209 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 210 unit = unit_to_var(expression) 211 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 212 213 214def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 215 scale = expression.args.get("scale") 216 timestamp = expression.this 217 218 if scale in (None, exp.UnixToTime.SECONDS): 219 return self.func("TIMESTAMP_SECONDS", timestamp) 220 if scale == exp.UnixToTime.MILLIS: 221 return self.func("TIMESTAMP_MILLIS", timestamp) 222 if scale == exp.UnixToTime.MICROS: 223 return self.func("TIMESTAMP_MICROS", timestamp) 224 225 unix_seconds = exp.cast( 226 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 227 ) 228 return self.func("TIMESTAMP_SECONDS", unix_seconds) 229 230 231def _build_time(args: t.List) -> exp.Func: 232 if len(args) == 1: 233 return exp.TsOrDsToTime(this=args[0]) 234 if len(args) == 2: 235 return exp.Time.from_arg_list(args) 236 return exp.TimeFromParts.from_arg_list(args) 237 238 239def _build_datetime(args: t.List) -> exp.Func: 240 if len(args) == 1: 241 return exp.TsOrDsToDatetime.from_arg_list(args) 242 if len(args) == 2: 243 return exp.Datetime.from_arg_list(args) 244 return exp.TimestampFromParts.from_arg_list(args) 245 246 247def build_date_diff(args: t.List) -> exp.Expression: 248 expr = exp.DateDiff( 249 this=seq_get(args, 0), 250 expression=seq_get(args, 1), 251 unit=seq_get(args, 2), 252 date_part_boundary=True, 253 ) 254 255 # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation 256 # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too 257 unit = expr.args.get("unit") 258 259 if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": 260 expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY"))) 261 262 return expr 263 264 265def _build_regexp_extract( 266 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 267) -> t.Callable[[t.List, BigQuery], E]: 268 def _builder(args: t.List, dialect: BigQuery) -> E: 269 try: 270 group = re.compile(args[1].name).groups == 1 271 except re.error: 272 group = False 273 274 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 275 return expr_type( 276 this=seq_get(args, 0), 277 expression=seq_get(args, 1), 278 position=seq_get(args, 2), 279 occurrence=seq_get(args, 3), 280 group=exp.Literal.number(1) if group else default_group, 281 **( 282 {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} 283 if expr_type is exp.RegexpExtract 284 else {} 285 ), 286 ) 287 288 return _builder 289 290 291def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 292 def _builder(args: t.List, dialect: Dialect) -> E: 293 if len(args) == 1: 294 # The default value for the JSONPath is '$' i.e all of the data 295 args.append(exp.Literal.string("$")) 296 return parser.build_extract_json_with_path(expr_type)(args, dialect) 297 298 return _builder 299 300 301def _str_to_datetime_sql( 302 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 303) -> str: 304 this = self.sql(expression, "this") 305 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 306 307 if expression.args.get("safe"): 308 fmt = self.format_time( 309 expression, 310 self.dialect.INVERSE_FORMAT_MAPPING, 311 self.dialect.INVERSE_FORMAT_TRIE, 312 ) 313 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 314 315 fmt = self.format_time(expression) 316 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 317 318 319@unsupported_args("ins_cost", "del_cost", "sub_cost") 320def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 321 max_dist = expression.args.get("max_dist") 322 if max_dist: 323 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 324 325 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 326 327 328def _build_levenshtein(args: t.List) -> exp.Levenshtein: 329 max_dist = seq_get(args, 2) 330 return exp.Levenshtein( 331 this=seq_get(args, 0), 332 expression=seq_get(args, 1), 333 max_dist=max_dist.expression if max_dist else None, 334 ) 335 336 337def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 338 def _builder(args: t.List) -> exp.TimeToStr: 339 formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")( 340 [expr_type(this=seq_get(args, 1)), seq_get(args, 0)] 341 ) 342 formatted_time.set("zone", seq_get(args, 2)) 343 return formatted_time 344 345 return _builder 346 347 348def _build_contains_substring(args: t.List) -> exp.Contains: 349 # Lowercase the operands in case of transpilation, as exp.Contains 350 # is case-sensitive on other dialects 351 this = exp.Lower(this=seq_get(args, 0)) 352 expr = exp.Lower(this=seq_get(args, 1)) 353 354 return exp.Contains(this=this, expression=expr, json_scope=seq_get(args, 2)) 355 356 357def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 358 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 359 upper = name.upper() 360 361 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 362 363 if dquote_escaping: 364 self._quote_json_path_key_using_brackets = False 365 366 sql = rename_func(upper)(self, expression) 367 368 if dquote_escaping: 369 self._quote_json_path_key_using_brackets = True 370 371 return sql 372 373 374class BigQuery(Dialect): 375 WEEK_OFFSET = -1 376 UNNEST_COLUMN_ONLY = True 377 SUPPORTS_USER_DEFINED_TYPES = False 378 SUPPORTS_SEMI_ANTI_JOIN = False 379 LOG_BASE_FIRST = False 380 HEX_LOWERCASE = True 381 FORCE_EARLY_ALIAS_REF_EXPANSION = True 382 EXPAND_ONLY_GROUP_ALIAS_REF = True 383 PRESERVE_ORIGINAL_NAMES = True 384 HEX_STRING_IS_INTEGER_TYPE = True 385 BYTE_STRING_IS_BYTES_TYPE = True 386 UUID_IS_STRING_TYPE = True 387 ANNOTATE_ALL_SCOPES = True 388 PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True 389 TABLES_REFERENCEABLE_AS_COLUMNS = True 390 SUPPORTS_STRUCT_STAR_EXPANSION = True 391 EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True 392 QUERY_RESULTS_ARE_STRUCTS = True 393 JSON_EXTRACT_SCALAR_SCALAR_ONLY = True 394 LEAST_GREATEST_IGNORES_NULLS = False 395 DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT 396 PRIORITIZE_NON_LITERAL_TYPES = True 397 398 # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap 399 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' 400 401 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 402 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 403 404 # bigquery udfs are case sensitive 405 NORMALIZE_FUNCTIONS = False 406 407 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 408 TIME_MAPPING = { 409 "%x": "%m/%d/%y", 410 "%D": "%m/%d/%y", 411 "%E6S": "%S.%f", 412 "%e": "%-d", 413 "%F": "%Y-%m-%d", 414 "%T": "%H:%M:%S", 415 "%c": "%a %b %e %H:%M:%S %Y", 416 } 417 418 INVERSE_TIME_MAPPING = { 419 # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery 420 # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse. 421 "%H:%M:%S.%f": "%H:%M:%E6S", 422 } 423 424 FORMAT_MAPPING = { 425 "DD": "%d", 426 "MM": "%m", 427 "MON": "%b", 428 "MONTH": "%B", 429 "YYYY": "%Y", 430 "YY": "%y", 431 "HH": "%I", 432 "HH12": "%I", 433 "HH24": "%H", 434 "MI": "%M", 435 "SS": "%S", 436 "SSSSS": "%f", 437 "TZH": "%z", 438 } 439 440 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 441 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 442 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 443 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 444 PSEUDOCOLUMNS = { 445 "_PARTITIONTIME", 446 "_PARTITIONDATE", 447 "_TABLE_SUFFIX", 448 "_FILE_NAME", 449 "_DBT_MAX_PARTITION", 450 } 451 452 # All set operations require either a DISTINCT or ALL specifier 453 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 454 455 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 456 COERCES_TO = { 457 **TypeAnnotator.COERCES_TO, 458 exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE}, 459 } 460 COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL} 461 COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL} 462 COERCES_TO[exp.DataType.Type.VARCHAR] |= { 463 exp.DataType.Type.DATE, 464 exp.DataType.Type.DATETIME, 465 exp.DataType.Type.TIME, 466 exp.DataType.Type.TIMESTAMP, 467 exp.DataType.Type.TIMESTAMPTZ, 468 } 469 470 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 471 472 def normalize_identifier(self, expression: E) -> E: 473 if ( 474 isinstance(expression, exp.Identifier) 475 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 476 ): 477 parent = expression.parent 478 while isinstance(parent, exp.Dot): 479 parent = parent.parent 480 481 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 482 # by default. The following check uses a heuristic to detect tables based on whether 483 # they are qualified. This should generally be correct, because tables in BigQuery 484 # must be qualified with at least a dataset, unless @@dataset_id is set. 485 case_sensitive = ( 486 isinstance(parent, exp.UserDefinedFunction) 487 or ( 488 isinstance(parent, exp.Table) 489 and parent.db 490 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 491 ) 492 or expression.meta.get("is_table") 493 ) 494 if not case_sensitive: 495 expression.set("this", expression.this.lower()) 496 497 return t.cast(E, expression) 498 499 return super().normalize_identifier(expression) 500 501 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 502 VAR_TOKENS = { 503 TokenType.DASH, 504 TokenType.VAR, 505 } 506 507 class Tokenizer(tokens.Tokenizer): 508 QUOTES = ["'", '"', '"""', "'''"] 509 COMMENTS = ["--", "#", ("/*", "*/")] 510 IDENTIFIERS = ["`"] 511 STRING_ESCAPES = ["\\"] 512 513 HEX_STRINGS = [("0x", ""), ("0X", "")] 514 515 BYTE_STRINGS = [ 516 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 517 ] 518 519 RAW_STRINGS = [ 520 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 521 ] 522 523 NESTED_COMMENTS = False 524 525 KEYWORDS = { 526 **tokens.Tokenizer.KEYWORDS, 527 "ANY TYPE": TokenType.VARIANT, 528 "BEGIN": TokenType.COMMAND, 529 "BEGIN TRANSACTION": TokenType.BEGIN, 530 "BYTEINT": TokenType.INT, 531 "BYTES": TokenType.BINARY, 532 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 533 "DATETIME": TokenType.TIMESTAMP, 534 "DECLARE": TokenType.DECLARE, 535 "ELSEIF": TokenType.COMMAND, 536 "EXCEPTION": TokenType.COMMAND, 537 "EXPORT": TokenType.EXPORT, 538 "FLOAT64": TokenType.DOUBLE, 539 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 540 "LOOP": TokenType.COMMAND, 541 "MODEL": TokenType.MODEL, 542 "NOT DETERMINISTIC": TokenType.VOLATILE, 543 "RECORD": TokenType.STRUCT, 544 "REPEAT": TokenType.COMMAND, 545 "TIMESTAMP": TokenType.TIMESTAMPTZ, 546 "WHILE": TokenType.COMMAND, 547 } 548 KEYWORDS.pop("DIV") 549 KEYWORDS.pop("VALUES") 550 KEYWORDS.pop("/*+") 551 552 class Parser(parser.Parser): 553 PREFIXED_PIVOT_COLUMNS = True 554 LOG_DEFAULTS_TO_LN = True 555 SUPPORTS_IMPLICIT_UNNEST = True 556 JOINS_HAVE_EQUAL_PRECEDENCE = True 557 558 # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier 559 ID_VAR_TOKENS = { 560 *parser.Parser.ID_VAR_TOKENS, 561 TokenType.GRANT, 562 } - {TokenType.ASC, TokenType.DESC} 563 564 ALIAS_TOKENS = { 565 *parser.Parser.ALIAS_TOKENS, 566 TokenType.GRANT, 567 } - {TokenType.ASC, TokenType.DESC} 568 569 TABLE_ALIAS_TOKENS = { 570 *parser.Parser.TABLE_ALIAS_TOKENS, 571 TokenType.GRANT, 572 } - {TokenType.ASC, TokenType.DESC} 573 574 COMMENT_TABLE_ALIAS_TOKENS = { 575 *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, 576 TokenType.GRANT, 577 } - {TokenType.ASC, TokenType.DESC} 578 579 UPDATE_ALIAS_TOKENS = { 580 *parser.Parser.UPDATE_ALIAS_TOKENS, 581 TokenType.GRANT, 582 } - {TokenType.ASC, TokenType.DESC} 583 584 FUNCTIONS = { 585 **parser.Parser.FUNCTIONS, 586 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 587 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 588 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 589 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 590 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 591 "BOOL": exp.JSONBool.from_arg_list, 592 "CONTAINS_SUBSTR": _build_contains_substring, 593 "DATE": _build_date, 594 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 595 "DATE_DIFF": build_date_diff, 596 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 597 "DATE_TRUNC": lambda args: exp.DateTrunc( 598 unit=seq_get(args, 1), 599 this=seq_get(args, 0), 600 zone=seq_get(args, 2), 601 ), 602 "DATETIME": _build_datetime, 603 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 604 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 605 "DIV": binary_from_function(exp.IntDiv), 606 "EDIT_DISTANCE": _build_levenshtein, 607 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 608 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 609 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 610 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 611 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 612 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 613 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 614 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 615 "JSON_STRIP_NULLS": _build_json_strip_nulls, 616 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 617 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 618 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 619 "MD5": exp.MD5Digest.from_arg_list, 620 "SHA1": exp.SHA1Digest.from_arg_list, 621 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 622 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 623 ), 624 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 625 "TO_HEX": _build_to_hex, 626 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 627 [seq_get(args, 1), seq_get(args, 0)] 628 ), 629 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 630 [seq_get(args, 1), seq_get(args, 0)] 631 ), 632 "PARSE_TIMESTAMP": _build_parse_timestamp, 633 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 634 [seq_get(args, 1), seq_get(args, 0)] 635 ), 636 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 637 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 638 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 639 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 640 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 641 ), 642 "SHA256": lambda args: exp.SHA2Digest( 643 this=seq_get(args, 0), length=exp.Literal.number(256) 644 ), 645 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 646 "SPLIT": lambda args: exp.Split( 647 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 648 this=seq_get(args, 0), 649 expression=seq_get(args, 1) or exp.Literal.string(","), 650 ), 651 "STRPOS": exp.StrPosition.from_arg_list, 652 "TIME": _build_time, 653 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 654 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 655 "TIMESTAMP": _build_timestamp, 656 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 657 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 658 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 659 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 660 ), 661 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 662 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 663 ), 664 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 665 "TO_JSON": lambda args: exp.JSONFormat( 666 this=seq_get(args, 0), options=seq_get(args, 1), to_json=True 667 ), 668 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 669 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 670 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 671 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 672 "FROM_HEX": exp.Unhex.from_arg_list, 673 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 674 } 675 # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function 676 FUNCTIONS.pop("SEARCH") 677 678 FUNCTION_PARSERS = { 679 **parser.Parser.FUNCTION_PARSERS, 680 "ARRAY": lambda self: self.expression( 681 exp.Array, 682 expressions=[self._parse_statement()], 683 struct_name_inheritance=True, 684 ), 685 "JSON_ARRAY": lambda self: self.expression( 686 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 687 ), 688 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 689 "PREDICT": lambda self: self._parse_ml(exp.Predict), 690 "TRANSLATE": lambda self: self._parse_translate(), 691 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 692 "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), 693 "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml( 694 exp.GenerateEmbedding, is_text=True 695 ), 696 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 697 "FORECAST": lambda self: self._parse_ml(exp.MLForecast), 698 } 699 FUNCTION_PARSERS.pop("TRIM") 700 701 NO_PAREN_FUNCTIONS = { 702 **parser.Parser.NO_PAREN_FUNCTIONS, 703 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 704 } 705 706 NESTED_TYPE_TOKENS = { 707 *parser.Parser.NESTED_TYPE_TOKENS, 708 TokenType.TABLE, 709 } 710 711 PROPERTY_PARSERS = { 712 **parser.Parser.PROPERTY_PARSERS, 713 "NOT DETERMINISTIC": lambda self: self.expression( 714 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 715 ), 716 "OPTIONS": lambda self: self._parse_with_property(), 717 } 718 719 CONSTRAINT_PARSERS = { 720 **parser.Parser.CONSTRAINT_PARSERS, 721 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 722 } 723 724 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 725 RANGE_PARSERS.pop(TokenType.OVERLAPS) 726 727 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 728 729 STATEMENT_PARSERS = { 730 **parser.Parser.STATEMENT_PARSERS, 731 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 732 TokenType.END: lambda self: self._parse_as_command(self._prev), 733 TokenType.FOR: lambda self: self._parse_for_in(), 734 TokenType.EXPORT: lambda self: self._parse_export_data(), 735 TokenType.DECLARE: lambda self: self._parse_declare(), 736 } 737 738 BRACKET_OFFSETS = { 739 "OFFSET": (0, False), 740 "ORDINAL": (1, False), 741 "SAFE_OFFSET": (0, True), 742 "SAFE_ORDINAL": (1, True), 743 } 744 745 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 746 index = self._index 747 this = self._parse_range() 748 self._match_text_seq("DO") 749 if self._match(TokenType.COMMAND): 750 self._retreat(index) 751 return self._parse_as_command(self._prev) 752 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 753 754 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 755 this = super()._parse_table_part(schema=schema) or self._parse_number() 756 757 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 758 if isinstance(this, exp.Identifier): 759 table_name = this.name 760 while self._match(TokenType.DASH, advance=False) and self._next: 761 start = self._curr 762 while self._is_connected() and not self._match_set( 763 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 764 ): 765 self._advance() 766 767 if start == self._curr: 768 break 769 770 table_name += self._find_sql(start, self._prev) 771 772 this = exp.Identifier( 773 this=table_name, quoted=this.args.get("quoted") 774 ).update_positions(this) 775 elif isinstance(this, exp.Literal): 776 table_name = this.name 777 778 if self._is_connected() and self._parse_var(any_token=True): 779 table_name += self._prev.text 780 781 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 782 783 return this 784 785 def _parse_table_parts( 786 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 787 ) -> exp.Table: 788 table = super()._parse_table_parts( 789 schema=schema, is_db_reference=is_db_reference, wildcard=True 790 ) 791 792 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 793 if not table.catalog: 794 if table.db: 795 previous_db = table.args["db"] 796 parts = table.db.split(".") 797 if len(parts) == 2 and not table.args["db"].quoted: 798 table.set( 799 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 800 ) 801 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 802 else: 803 previous_this = table.this 804 parts = table.name.split(".") 805 if len(parts) == 2 and not table.this.quoted: 806 table.set( 807 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 808 ) 809 table.set( 810 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 811 ) 812 813 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 814 alias = table.this 815 catalog, db, this, *rest = ( 816 exp.to_identifier(p, quoted=True) 817 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 818 ) 819 820 for part in (catalog, db, this): 821 if part: 822 part.update_positions(table.this) 823 824 if rest and this: 825 this = exp.Dot.build([this, *rest]) # type: ignore 826 827 table = exp.Table( 828 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 829 ) 830 table.meta["quoted_table"] = True 831 else: 832 alias = None 833 834 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 835 # dataset, so if the project identifier is omitted we need to fix the ast so that 836 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 837 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 838 # views, because it would seem like the "catalog" part is set, when it'd actually 839 # be the region/dataset. Merging the two identifiers into a single one is done to 840 # avoid producing a 4-part Table reference, which would cause issues in the schema 841 # module, when there are 3-part table names mixed with information schema views. 842 # 843 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 844 table_parts = table.parts 845 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 846 # We need to alias the table here to avoid breaking existing qualified columns. 847 # This is expected to be safe, because if there's an actual alias coming up in 848 # the token stream, it will overwrite this one. If there isn't one, we are only 849 # exposing the name that can be used to reference the view explicitly (a no-op). 850 exp.alias_( 851 table, 852 t.cast(exp.Identifier, alias or table_parts[-1]), 853 table=True, 854 copy=False, 855 ) 856 857 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 858 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 859 line=table_parts[-2].meta.get("line"), 860 col=table_parts[-1].meta.get("col"), 861 start=table_parts[-2].meta.get("start"), 862 end=table_parts[-1].meta.get("end"), 863 ) 864 table.set("this", new_this) 865 table.set("db", seq_get(table_parts, -3)) 866 table.set("catalog", seq_get(table_parts, -4)) 867 868 return table 869 870 def _parse_column(self) -> t.Optional[exp.Expression]: 871 column = super()._parse_column() 872 if isinstance(column, exp.Column): 873 parts = column.parts 874 if any("." in p.name for p in parts): 875 catalog, db, table, this, *rest = ( 876 exp.to_identifier(p, quoted=True) 877 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 878 ) 879 880 if rest and this: 881 this = exp.Dot.build([this, *rest]) # type: ignore 882 883 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 884 column.meta["quoted_column"] = True 885 886 return column 887 888 @t.overload 889 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 890 891 @t.overload 892 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 893 894 def _parse_json_object(self, agg=False): 895 json_object = super()._parse_json_object() 896 array_kv_pair = seq_get(json_object.expressions, 0) 897 898 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 899 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 900 if ( 901 array_kv_pair 902 and isinstance(array_kv_pair.this, exp.Array) 903 and isinstance(array_kv_pair.expression, exp.Array) 904 ): 905 keys = array_kv_pair.this.expressions 906 values = array_kv_pair.expression.expressions 907 908 json_object.set( 909 "expressions", 910 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 911 ) 912 913 return json_object 914 915 def _parse_bracket( 916 self, this: t.Optional[exp.Expression] = None 917 ) -> t.Optional[exp.Expression]: 918 bracket = super()._parse_bracket(this) 919 920 if isinstance(bracket, exp.Array): 921 bracket.set("struct_name_inheritance", True) 922 923 if this is bracket: 924 return bracket 925 926 if isinstance(bracket, exp.Bracket): 927 for expression in bracket.expressions: 928 name = expression.name.upper() 929 930 if name not in self.BRACKET_OFFSETS: 931 break 932 933 offset, safe = self.BRACKET_OFFSETS[name] 934 bracket.set("offset", offset) 935 bracket.set("safe", safe) 936 expression.replace(expression.expressions[0]) 937 938 return bracket 939 940 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 941 unnest = super()._parse_unnest(with_alias=with_alias) 942 943 if not unnest: 944 return None 945 946 unnest_expr = seq_get(unnest.expressions, 0) 947 if unnest_expr: 948 from sqlglot.optimizer.annotate_types import annotate_types 949 950 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 951 952 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 953 # in contrast to other dialects such as DuckDB which flattens only the array by default 954 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 955 array_elem.is_type(exp.DataType.Type.STRUCT) 956 for array_elem in unnest_expr._type.expressions 957 ): 958 unnest.set("explode_array", True) 959 960 return unnest 961 962 def _parse_make_interval(self) -> exp.MakeInterval: 963 expr = exp.MakeInterval() 964 965 for arg_key in MAKE_INTERVAL_KWARGS: 966 value = self._parse_lambda() 967 968 if not value: 969 break 970 971 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 972 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 973 if isinstance(value, exp.Kwarg): 974 arg_key = value.this.name 975 976 expr.set(arg_key, value) 977 978 self._match(TokenType.COMMA) 979 980 return expr 981 982 def _parse_ml(self, expr_type: t.Type[E], **kwargs) -> E: 983 self._match_text_seq("MODEL") 984 this = self._parse_table() 985 986 self._match(TokenType.COMMA) 987 self._match_text_seq("TABLE") 988 989 # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one 990 expression = ( 991 self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None 992 ) 993 994 self._match(TokenType.COMMA) 995 996 return self.expression( 997 expr_type, 998 this=this, 999 expression=expression, 1000 params_struct=self._parse_bitwise(), 1001 **kwargs, 1002 ) 1003 1004 def _parse_translate(self) -> exp.Translate | exp.MLTranslate: 1005 # Check if this is ML.TRANSLATE by looking at previous tokens 1006 token = seq_get(self._tokens, self._index - 4) 1007 if token and token.text.upper() == "ML": 1008 return self._parse_ml(exp.MLTranslate) 1009 1010 return exp.Translate.from_arg_list(self._parse_function_args()) 1011 1012 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1013 self._match(TokenType.TABLE) 1014 this = self._parse_table() 1015 1016 expr = self.expression(exp.FeaturesAtTime, this=this) 1017 1018 while self._match(TokenType.COMMA): 1019 arg = self._parse_lambda() 1020 1021 # Get the LHS of the Kwarg and set the arg to that value, e.g 1022 # "num_rows => 1" sets the expr's `num_rows` arg 1023 if arg: 1024 expr.set(arg.this.name, arg) 1025 1026 return expr 1027 1028 def _parse_vector_search(self) -> exp.VectorSearch: 1029 self._match(TokenType.TABLE) 1030 base_table = self._parse_table() 1031 1032 self._match(TokenType.COMMA) 1033 1034 column_to_search = self._parse_bitwise() 1035 self._match(TokenType.COMMA) 1036 1037 self._match(TokenType.TABLE) 1038 query_table = self._parse_table() 1039 1040 expr = self.expression( 1041 exp.VectorSearch, 1042 this=base_table, 1043 column_to_search=column_to_search, 1044 query_table=query_table, 1045 ) 1046 1047 while self._match(TokenType.COMMA): 1048 # query_column_to_search can be named argument or positional 1049 if self._match(TokenType.STRING, advance=False): 1050 query_column = self._parse_string() 1051 expr.set("query_column_to_search", query_column) 1052 else: 1053 arg = self._parse_lambda() 1054 if arg: 1055 expr.set(arg.this.name, arg) 1056 1057 return expr 1058 1059 def _parse_export_data(self) -> exp.Export: 1060 self._match_text_seq("DATA") 1061 1062 return self.expression( 1063 exp.Export, 1064 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1065 options=self._parse_properties(), 1066 this=self._match_text_seq("AS") and self._parse_select(), 1067 ) 1068 1069 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 1070 this = super()._parse_column_ops(this) 1071 1072 if isinstance(this, exp.Dot): 1073 prefix_name = this.this.name.upper() 1074 func_name = this.name.upper() 1075 if prefix_name == "NET": 1076 if func_name == "HOST": 1077 this = self.expression( 1078 exp.NetHost, this=seq_get(this.expression.expressions, 0) 1079 ) 1080 elif prefix_name == "SAFE": 1081 if func_name == "TIMESTAMP": 1082 this = _build_timestamp(this.expression.expressions) 1083 this.set("safe", True) 1084 1085 return this 1086 1087 class Generator(generator.Generator): 1088 INTERVAL_ALLOWS_PLURAL_FORM = False 1089 JOIN_HINTS = False 1090 QUERY_HINTS = False 1091 TABLE_HINTS = False 1092 LIMIT_FETCH = "LIMIT" 1093 RENAME_TABLE_WITH_DB = False 1094 NVL2_SUPPORTED = False 1095 UNNEST_WITH_ORDINALITY = False 1096 COLLATE_IS_FUNC = True 1097 LIMIT_ONLY_LITERALS = True 1098 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1099 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1100 JSON_KEY_VALUE_PAIR_SEP = "," 1101 NULL_ORDERING_SUPPORTED = False 1102 IGNORE_NULLS_IN_FUNC = True 1103 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1104 CAN_IMPLEMENT_ARRAY_ANY = True 1105 SUPPORTS_TO_NUMBER = False 1106 NAMED_PLACEHOLDER_TOKEN = "@" 1107 HEX_FUNC = "TO_HEX" 1108 WITH_PROPERTIES_PREFIX = "OPTIONS" 1109 SUPPORTS_EXPLODING_PROJECTIONS = False 1110 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1111 SUPPORTS_UNIX_SECONDS = True 1112 1113 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1114 1115 TS_OR_DS_TYPES = ( 1116 exp.TsOrDsToDatetime, 1117 exp.TsOrDsToTimestamp, 1118 exp.TsOrDsToTime, 1119 exp.TsOrDsToDate, 1120 ) 1121 1122 TRANSFORMS = { 1123 **generator.Generator.TRANSFORMS, 1124 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1125 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1126 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1127 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1128 exp.Array: inline_array_unless_query, 1129 exp.ArrayContains: _array_contains_sql, 1130 exp.ArrayFilter: filter_array_using_unnest, 1131 exp.ArrayRemove: filter_array_using_unnest, 1132 exp.BitwiseAndAgg: rename_func("BIT_AND"), 1133 exp.BitwiseOrAgg: rename_func("BIT_OR"), 1134 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 1135 exp.BitwiseCount: rename_func("BIT_COUNT"), 1136 exp.ByteLength: rename_func("BYTE_LENGTH"), 1137 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1138 exp.CollateProperty: lambda self, e: ( 1139 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1140 if e.args.get("default") 1141 else f"COLLATE {self.sql(e, 'this')}" 1142 ), 1143 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1144 exp.CountIf: rename_func("COUNTIF"), 1145 exp.Create: _create_sql, 1146 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1147 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1148 exp.DateDiff: lambda self, e: self.func( 1149 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1150 ), 1151 exp.DateFromParts: rename_func("DATE"), 1152 exp.DateStrToDate: datestrtodate_sql, 1153 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1154 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1155 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1156 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1157 exp.FromTimeZone: lambda self, e: self.func( 1158 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1159 ), 1160 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1161 exp.GroupConcat: lambda self, e: groupconcat_sql( 1162 self, e, func_name="STRING_AGG", within_group=False, sep=None 1163 ), 1164 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1165 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1166 exp.If: if_sql(false_value="NULL"), 1167 exp.ILike: no_ilike_sql, 1168 exp.IntDiv: rename_func("DIV"), 1169 exp.Int64: rename_func("INT64"), 1170 exp.JSONBool: rename_func("BOOL"), 1171 exp.JSONExtract: _json_extract_sql, 1172 exp.JSONExtractArray: _json_extract_sql, 1173 exp.JSONExtractScalar: _json_extract_sql, 1174 exp.JSONFormat: lambda self, e: self.func( 1175 "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING", 1176 e.this, 1177 e.args.get("options"), 1178 ), 1179 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1180 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1181 exp.Levenshtein: _levenshtein_sql, 1182 exp.Max: max_or_greatest, 1183 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1184 exp.MD5Digest: rename_func("MD5"), 1185 exp.Min: min_or_least, 1186 exp.Normalize: lambda self, e: self.func( 1187 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1188 e.this, 1189 e.args.get("form"), 1190 ), 1191 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1192 exp.RegexpExtract: lambda self, e: self.func( 1193 "REGEXP_EXTRACT", 1194 e.this, 1195 e.expression, 1196 e.args.get("position"), 1197 e.args.get("occurrence"), 1198 ), 1199 exp.RegexpExtractAll: lambda self, e: self.func( 1200 "REGEXP_EXTRACT_ALL", e.this, e.expression 1201 ), 1202 exp.RegexpReplace: regexp_replace_sql, 1203 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1204 exp.ReturnsProperty: _returnsproperty_sql, 1205 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1206 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1207 exp.ParseDatetime: lambda self, e: self.func( 1208 "PARSE_DATETIME", self.format_time(e), e.this 1209 ), 1210 exp.Select: transforms.preprocess( 1211 [ 1212 transforms.explode_projection_to_unnest(), 1213 transforms.unqualify_unnest, 1214 transforms.eliminate_distinct_on, 1215 _alias_ordered_group, 1216 transforms.eliminate_semi_and_anti_joins, 1217 ] 1218 ), 1219 exp.SHA: rename_func("SHA1"), 1220 exp.SHA2: sha256_sql, 1221 exp.SHA1Digest: rename_func("SHA1"), 1222 exp.SHA2Digest: sha2_digest_sql, 1223 exp.StabilityProperty: lambda self, e: ( 1224 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1225 ), 1226 exp.String: rename_func("STRING"), 1227 exp.StrPosition: lambda self, e: ( 1228 strposition_sql( 1229 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1230 ) 1231 ), 1232 exp.StrToDate: _str_to_datetime_sql, 1233 exp.StrToTime: _str_to_datetime_sql, 1234 exp.SessionUser: lambda *_: "SESSION_USER()", 1235 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1236 exp.TimeFromParts: rename_func("TIME"), 1237 exp.TimestampFromParts: rename_func("DATETIME"), 1238 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1239 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1240 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1241 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1242 exp.TimeStrToTime: timestrtotime_sql, 1243 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1244 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1245 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1246 exp.TsOrDsToTime: rename_func("TIME"), 1247 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1248 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1249 exp.Unhex: rename_func("FROM_HEX"), 1250 exp.UnixDate: rename_func("UNIX_DATE"), 1251 exp.UnixToTime: _unix_to_time_sql, 1252 exp.Uuid: lambda *_: "GENERATE_UUID()", 1253 exp.Values: _derived_table_values_to_unnest, 1254 exp.VariancePop: rename_func("VAR_POP"), 1255 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1256 } 1257 1258 SUPPORTED_JSON_PATH_PARTS = { 1259 exp.JSONPathKey, 1260 exp.JSONPathRoot, 1261 exp.JSONPathSubscript, 1262 } 1263 1264 TYPE_MAPPING = { 1265 **generator.Generator.TYPE_MAPPING, 1266 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1267 exp.DataType.Type.BIGINT: "INT64", 1268 exp.DataType.Type.BINARY: "BYTES", 1269 exp.DataType.Type.BLOB: "BYTES", 1270 exp.DataType.Type.BOOLEAN: "BOOL", 1271 exp.DataType.Type.CHAR: "STRING", 1272 exp.DataType.Type.DECIMAL: "NUMERIC", 1273 exp.DataType.Type.DOUBLE: "FLOAT64", 1274 exp.DataType.Type.FLOAT: "FLOAT64", 1275 exp.DataType.Type.INT: "INT64", 1276 exp.DataType.Type.NCHAR: "STRING", 1277 exp.DataType.Type.NVARCHAR: "STRING", 1278 exp.DataType.Type.SMALLINT: "INT64", 1279 exp.DataType.Type.TEXT: "STRING", 1280 exp.DataType.Type.TIMESTAMP: "DATETIME", 1281 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1282 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1283 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1284 exp.DataType.Type.TINYINT: "INT64", 1285 exp.DataType.Type.ROWVERSION: "BYTES", 1286 exp.DataType.Type.UUID: "STRING", 1287 exp.DataType.Type.VARBINARY: "BYTES", 1288 exp.DataType.Type.VARCHAR: "STRING", 1289 exp.DataType.Type.VARIANT: "ANY TYPE", 1290 } 1291 1292 PROPERTIES_LOCATION = { 1293 **generator.Generator.PROPERTIES_LOCATION, 1294 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1295 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1296 } 1297 1298 # WINDOW comes after QUALIFY 1299 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1300 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1301 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1302 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1303 } 1304 1305 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1306 RESERVED_KEYWORDS = { 1307 "all", 1308 "and", 1309 "any", 1310 "array", 1311 "as", 1312 "asc", 1313 "assert_rows_modified", 1314 "at", 1315 "between", 1316 "by", 1317 "case", 1318 "cast", 1319 "collate", 1320 "contains", 1321 "create", 1322 "cross", 1323 "cube", 1324 "current", 1325 "default", 1326 "define", 1327 "desc", 1328 "distinct", 1329 "else", 1330 "end", 1331 "enum", 1332 "escape", 1333 "except", 1334 "exclude", 1335 "exists", 1336 "extract", 1337 "false", 1338 "fetch", 1339 "following", 1340 "for", 1341 "from", 1342 "full", 1343 "group", 1344 "grouping", 1345 "groups", 1346 "hash", 1347 "having", 1348 "if", 1349 "ignore", 1350 "in", 1351 "inner", 1352 "intersect", 1353 "interval", 1354 "into", 1355 "is", 1356 "join", 1357 "lateral", 1358 "left", 1359 "like", 1360 "limit", 1361 "lookup", 1362 "merge", 1363 "natural", 1364 "new", 1365 "no", 1366 "not", 1367 "null", 1368 "nulls", 1369 "of", 1370 "on", 1371 "or", 1372 "order", 1373 "outer", 1374 "over", 1375 "partition", 1376 "preceding", 1377 "proto", 1378 "qualify", 1379 "range", 1380 "recursive", 1381 "respect", 1382 "right", 1383 "rollup", 1384 "rows", 1385 "select", 1386 "set", 1387 "some", 1388 "struct", 1389 "tablesample", 1390 "then", 1391 "to", 1392 "treat", 1393 "true", 1394 "unbounded", 1395 "union", 1396 "unnest", 1397 "using", 1398 "when", 1399 "where", 1400 "window", 1401 "with", 1402 "within", 1403 } 1404 1405 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1406 unit = expression.unit 1407 unit_sql = unit.name if unit.is_string else self.sql(unit) 1408 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1409 1410 def mod_sql(self, expression: exp.Mod) -> str: 1411 this = expression.this 1412 expr = expression.expression 1413 return self.func( 1414 "MOD", 1415 this.unnest() if isinstance(this, exp.Paren) else this, 1416 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1417 ) 1418 1419 def column_parts(self, expression: exp.Column) -> str: 1420 if expression.meta.get("quoted_column"): 1421 # If a column reference is of the form `dataset.table`.name, we need 1422 # to preserve the quoted table path, otherwise the reference breaks 1423 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1424 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1425 return f"{table_path}.{self.sql(expression, 'this')}" 1426 1427 return super().column_parts(expression) 1428 1429 def table_parts(self, expression: exp.Table) -> str: 1430 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1431 # we need to make sure the correct quoting is used in each case. 1432 # 1433 # For example, if there is a CTE x that clashes with a schema name, then the former will 1434 # return the table y in that schema, whereas the latter will return the CTE's y column: 1435 # 1436 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1437 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1438 if expression.meta.get("quoted_table"): 1439 table_parts = ".".join(p.name for p in expression.parts) 1440 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1441 1442 return super().table_parts(expression) 1443 1444 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1445 this = expression.this 1446 if isinstance(this, exp.TsOrDsToDatetime): 1447 func_name = "FORMAT_DATETIME" 1448 elif isinstance(this, exp.TsOrDsToTimestamp): 1449 func_name = "FORMAT_TIMESTAMP" 1450 elif isinstance(this, exp.TsOrDsToTime): 1451 func_name = "FORMAT_TIME" 1452 else: 1453 func_name = "FORMAT_DATE" 1454 1455 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1456 return self.func( 1457 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1458 ) 1459 1460 def eq_sql(self, expression: exp.EQ) -> str: 1461 # Operands of = cannot be NULL in BigQuery 1462 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1463 if not isinstance(expression.parent, exp.Update): 1464 return "NULL" 1465 1466 return self.binary(expression, "=") 1467 1468 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1469 parent = expression.parent 1470 1471 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1472 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1473 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1474 return self.func( 1475 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1476 ) 1477 1478 return super().attimezone_sql(expression) 1479 1480 def trycast_sql(self, expression: exp.TryCast) -> str: 1481 return self.cast_sql(expression, safe_prefix="SAFE_") 1482 1483 def bracket_sql(self, expression: exp.Bracket) -> str: 1484 this = expression.this 1485 expressions = expression.expressions 1486 1487 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1488 arg = expressions[0] 1489 if arg.type is None: 1490 from sqlglot.optimizer.annotate_types import annotate_types 1491 1492 arg = annotate_types(arg, dialect=self.dialect) 1493 1494 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1495 # BQ doesn't support bracket syntax with string values for structs 1496 return f"{self.sql(this)}.{arg.name}" 1497 1498 expressions_sql = self.expressions(expression, flat=True) 1499 offset = expression.args.get("offset") 1500 1501 if offset == 0: 1502 expressions_sql = f"OFFSET({expressions_sql})" 1503 elif offset == 1: 1504 expressions_sql = f"ORDINAL({expressions_sql})" 1505 elif offset is not None: 1506 self.unsupported(f"Unsupported array offset: {offset}") 1507 1508 if expression.args.get("safe"): 1509 expressions_sql = f"SAFE_{expressions_sql}" 1510 1511 return f"{self.sql(this)}[{expressions_sql}]" 1512 1513 def in_unnest_op(self, expression: exp.Unnest) -> str: 1514 return self.sql(expression) 1515 1516 def version_sql(self, expression: exp.Version) -> str: 1517 if expression.name == "TIMESTAMP": 1518 expression.set("this", "SYSTEM_TIME") 1519 return super().version_sql(expression) 1520 1521 def contains_sql(self, expression: exp.Contains) -> str: 1522 this = expression.this 1523 expr = expression.expression 1524 1525 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1526 this = this.this 1527 expr = expr.this 1528 1529 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1530 1531 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1532 this = expression.this 1533 1534 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1535 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1536 # because they aren't literals and so the above syntax is invalid BigQuery. 1537 if isinstance(this, exp.Array): 1538 elem = seq_get(this.expressions, 0) 1539 if not (elem and elem.find(exp.Query)): 1540 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1541 1542 return super().cast_sql(expression, safe_prefix=safe_prefix) 1543 1544 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1545 variables = self.expressions(expression, "this") 1546 default = self.sql(expression, "default") 1547 default = f" DEFAULT {default}" if default else "" 1548 kind = self.sql(expression, "kind") 1549 kind = f" {kind}" if kind else "" 1550 1551 return f"{variables}{kind}{default}" 1552 1553 def timestamp_sql(self, expression: exp.Timestamp) -> str: 1554 prefix = "SAFE." if expression.args.get("safe") else "" 1555 return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))
248def build_date_diff(args: t.List) -> exp.Expression: 249 expr = exp.DateDiff( 250 this=seq_get(args, 0), 251 expression=seq_get(args, 1), 252 unit=seq_get(args, 2), 253 date_part_boundary=True, 254 ) 255 256 # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation 257 # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too 258 unit = expr.args.get("unit") 259 260 if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": 261 expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY"))) 262 263 return expr
375class BigQuery(Dialect): 376 WEEK_OFFSET = -1 377 UNNEST_COLUMN_ONLY = True 378 SUPPORTS_USER_DEFINED_TYPES = False 379 SUPPORTS_SEMI_ANTI_JOIN = False 380 LOG_BASE_FIRST = False 381 HEX_LOWERCASE = True 382 FORCE_EARLY_ALIAS_REF_EXPANSION = True 383 EXPAND_ONLY_GROUP_ALIAS_REF = True 384 PRESERVE_ORIGINAL_NAMES = True 385 HEX_STRING_IS_INTEGER_TYPE = True 386 BYTE_STRING_IS_BYTES_TYPE = True 387 UUID_IS_STRING_TYPE = True 388 ANNOTATE_ALL_SCOPES = True 389 PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True 390 TABLES_REFERENCEABLE_AS_COLUMNS = True 391 SUPPORTS_STRUCT_STAR_EXPANSION = True 392 EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True 393 QUERY_RESULTS_ARE_STRUCTS = True 394 JSON_EXTRACT_SCALAR_SCALAR_ONLY = True 395 LEAST_GREATEST_IGNORES_NULLS = False 396 DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT 397 PRIORITIZE_NON_LITERAL_TYPES = True 398 399 # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap 400 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' 401 402 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 403 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 404 405 # bigquery udfs are case sensitive 406 NORMALIZE_FUNCTIONS = False 407 408 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 409 TIME_MAPPING = { 410 "%x": "%m/%d/%y", 411 "%D": "%m/%d/%y", 412 "%E6S": "%S.%f", 413 "%e": "%-d", 414 "%F": "%Y-%m-%d", 415 "%T": "%H:%M:%S", 416 "%c": "%a %b %e %H:%M:%S %Y", 417 } 418 419 INVERSE_TIME_MAPPING = { 420 # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery 421 # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse. 422 "%H:%M:%S.%f": "%H:%M:%E6S", 423 } 424 425 FORMAT_MAPPING = { 426 "DD": "%d", 427 "MM": "%m", 428 "MON": "%b", 429 "MONTH": "%B", 430 "YYYY": "%Y", 431 "YY": "%y", 432 "HH": "%I", 433 "HH12": "%I", 434 "HH24": "%H", 435 "MI": "%M", 436 "SS": "%S", 437 "SSSSS": "%f", 438 "TZH": "%z", 439 } 440 441 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 442 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 443 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 444 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 445 PSEUDOCOLUMNS = { 446 "_PARTITIONTIME", 447 "_PARTITIONDATE", 448 "_TABLE_SUFFIX", 449 "_FILE_NAME", 450 "_DBT_MAX_PARTITION", 451 } 452 453 # All set operations require either a DISTINCT or ALL specifier 454 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 455 456 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 457 COERCES_TO = { 458 **TypeAnnotator.COERCES_TO, 459 exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE}, 460 } 461 COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL} 462 COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL} 463 COERCES_TO[exp.DataType.Type.VARCHAR] |= { 464 exp.DataType.Type.DATE, 465 exp.DataType.Type.DATETIME, 466 exp.DataType.Type.TIME, 467 exp.DataType.Type.TIMESTAMP, 468 exp.DataType.Type.TIMESTAMPTZ, 469 } 470 471 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 472 473 def normalize_identifier(self, expression: E) -> E: 474 if ( 475 isinstance(expression, exp.Identifier) 476 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 477 ): 478 parent = expression.parent 479 while isinstance(parent, exp.Dot): 480 parent = parent.parent 481 482 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 483 # by default. The following check uses a heuristic to detect tables based on whether 484 # they are qualified. This should generally be correct, because tables in BigQuery 485 # must be qualified with at least a dataset, unless @@dataset_id is set. 486 case_sensitive = ( 487 isinstance(parent, exp.UserDefinedFunction) 488 or ( 489 isinstance(parent, exp.Table) 490 and parent.db 491 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 492 ) 493 or expression.meta.get("is_table") 494 ) 495 if not case_sensitive: 496 expression.set("this", expression.this.lower()) 497 498 return t.cast(E, expression) 499 500 return super().normalize_identifier(expression) 501 502 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 503 VAR_TOKENS = { 504 TokenType.DASH, 505 TokenType.VAR, 506 } 507 508 class Tokenizer(tokens.Tokenizer): 509 QUOTES = ["'", '"', '"""', "'''"] 510 COMMENTS = ["--", "#", ("/*", "*/")] 511 IDENTIFIERS = ["`"] 512 STRING_ESCAPES = ["\\"] 513 514 HEX_STRINGS = [("0x", ""), ("0X", "")] 515 516 BYTE_STRINGS = [ 517 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 518 ] 519 520 RAW_STRINGS = [ 521 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 522 ] 523 524 NESTED_COMMENTS = False 525 526 KEYWORDS = { 527 **tokens.Tokenizer.KEYWORDS, 528 "ANY TYPE": TokenType.VARIANT, 529 "BEGIN": TokenType.COMMAND, 530 "BEGIN TRANSACTION": TokenType.BEGIN, 531 "BYTEINT": TokenType.INT, 532 "BYTES": TokenType.BINARY, 533 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 534 "DATETIME": TokenType.TIMESTAMP, 535 "DECLARE": TokenType.DECLARE, 536 "ELSEIF": TokenType.COMMAND, 537 "EXCEPTION": TokenType.COMMAND, 538 "EXPORT": TokenType.EXPORT, 539 "FLOAT64": TokenType.DOUBLE, 540 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 541 "LOOP": TokenType.COMMAND, 542 "MODEL": TokenType.MODEL, 543 "NOT DETERMINISTIC": TokenType.VOLATILE, 544 "RECORD": TokenType.STRUCT, 545 "REPEAT": TokenType.COMMAND, 546 "TIMESTAMP": TokenType.TIMESTAMPTZ, 547 "WHILE": TokenType.COMMAND, 548 } 549 KEYWORDS.pop("DIV") 550 KEYWORDS.pop("VALUES") 551 KEYWORDS.pop("/*+") 552 553 class Parser(parser.Parser): 554 PREFIXED_PIVOT_COLUMNS = True 555 LOG_DEFAULTS_TO_LN = True 556 SUPPORTS_IMPLICIT_UNNEST = True 557 JOINS_HAVE_EQUAL_PRECEDENCE = True 558 559 # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier 560 ID_VAR_TOKENS = { 561 *parser.Parser.ID_VAR_TOKENS, 562 TokenType.GRANT, 563 } - {TokenType.ASC, TokenType.DESC} 564 565 ALIAS_TOKENS = { 566 *parser.Parser.ALIAS_TOKENS, 567 TokenType.GRANT, 568 } - {TokenType.ASC, TokenType.DESC} 569 570 TABLE_ALIAS_TOKENS = { 571 *parser.Parser.TABLE_ALIAS_TOKENS, 572 TokenType.GRANT, 573 } - {TokenType.ASC, TokenType.DESC} 574 575 COMMENT_TABLE_ALIAS_TOKENS = { 576 *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, 577 TokenType.GRANT, 578 } - {TokenType.ASC, TokenType.DESC} 579 580 UPDATE_ALIAS_TOKENS = { 581 *parser.Parser.UPDATE_ALIAS_TOKENS, 582 TokenType.GRANT, 583 } - {TokenType.ASC, TokenType.DESC} 584 585 FUNCTIONS = { 586 **parser.Parser.FUNCTIONS, 587 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 588 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 589 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 590 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 591 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 592 "BOOL": exp.JSONBool.from_arg_list, 593 "CONTAINS_SUBSTR": _build_contains_substring, 594 "DATE": _build_date, 595 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 596 "DATE_DIFF": build_date_diff, 597 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 598 "DATE_TRUNC": lambda args: exp.DateTrunc( 599 unit=seq_get(args, 1), 600 this=seq_get(args, 0), 601 zone=seq_get(args, 2), 602 ), 603 "DATETIME": _build_datetime, 604 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 605 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 606 "DIV": binary_from_function(exp.IntDiv), 607 "EDIT_DISTANCE": _build_levenshtein, 608 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 609 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 610 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 611 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 612 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 613 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 614 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 615 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 616 "JSON_STRIP_NULLS": _build_json_strip_nulls, 617 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 618 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 619 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 620 "MD5": exp.MD5Digest.from_arg_list, 621 "SHA1": exp.SHA1Digest.from_arg_list, 622 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 623 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 624 ), 625 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 626 "TO_HEX": _build_to_hex, 627 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 628 [seq_get(args, 1), seq_get(args, 0)] 629 ), 630 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 631 [seq_get(args, 1), seq_get(args, 0)] 632 ), 633 "PARSE_TIMESTAMP": _build_parse_timestamp, 634 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 635 [seq_get(args, 1), seq_get(args, 0)] 636 ), 637 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 638 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 639 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 640 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 641 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 642 ), 643 "SHA256": lambda args: exp.SHA2Digest( 644 this=seq_get(args, 0), length=exp.Literal.number(256) 645 ), 646 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 647 "SPLIT": lambda args: exp.Split( 648 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 649 this=seq_get(args, 0), 650 expression=seq_get(args, 1) or exp.Literal.string(","), 651 ), 652 "STRPOS": exp.StrPosition.from_arg_list, 653 "TIME": _build_time, 654 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 655 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 656 "TIMESTAMP": _build_timestamp, 657 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 658 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 659 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 660 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 661 ), 662 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 663 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 664 ), 665 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 666 "TO_JSON": lambda args: exp.JSONFormat( 667 this=seq_get(args, 0), options=seq_get(args, 1), to_json=True 668 ), 669 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 670 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 671 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 672 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 673 "FROM_HEX": exp.Unhex.from_arg_list, 674 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 675 } 676 # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function 677 FUNCTIONS.pop("SEARCH") 678 679 FUNCTION_PARSERS = { 680 **parser.Parser.FUNCTION_PARSERS, 681 "ARRAY": lambda self: self.expression( 682 exp.Array, 683 expressions=[self._parse_statement()], 684 struct_name_inheritance=True, 685 ), 686 "JSON_ARRAY": lambda self: self.expression( 687 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 688 ), 689 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 690 "PREDICT": lambda self: self._parse_ml(exp.Predict), 691 "TRANSLATE": lambda self: self._parse_translate(), 692 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 693 "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), 694 "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml( 695 exp.GenerateEmbedding, is_text=True 696 ), 697 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 698 "FORECAST": lambda self: self._parse_ml(exp.MLForecast), 699 } 700 FUNCTION_PARSERS.pop("TRIM") 701 702 NO_PAREN_FUNCTIONS = { 703 **parser.Parser.NO_PAREN_FUNCTIONS, 704 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 705 } 706 707 NESTED_TYPE_TOKENS = { 708 *parser.Parser.NESTED_TYPE_TOKENS, 709 TokenType.TABLE, 710 } 711 712 PROPERTY_PARSERS = { 713 **parser.Parser.PROPERTY_PARSERS, 714 "NOT DETERMINISTIC": lambda self: self.expression( 715 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 716 ), 717 "OPTIONS": lambda self: self._parse_with_property(), 718 } 719 720 CONSTRAINT_PARSERS = { 721 **parser.Parser.CONSTRAINT_PARSERS, 722 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 723 } 724 725 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 726 RANGE_PARSERS.pop(TokenType.OVERLAPS) 727 728 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 729 730 STATEMENT_PARSERS = { 731 **parser.Parser.STATEMENT_PARSERS, 732 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 733 TokenType.END: lambda self: self._parse_as_command(self._prev), 734 TokenType.FOR: lambda self: self._parse_for_in(), 735 TokenType.EXPORT: lambda self: self._parse_export_data(), 736 TokenType.DECLARE: lambda self: self._parse_declare(), 737 } 738 739 BRACKET_OFFSETS = { 740 "OFFSET": (0, False), 741 "ORDINAL": (1, False), 742 "SAFE_OFFSET": (0, True), 743 "SAFE_ORDINAL": (1, True), 744 } 745 746 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 747 index = self._index 748 this = self._parse_range() 749 self._match_text_seq("DO") 750 if self._match(TokenType.COMMAND): 751 self._retreat(index) 752 return self._parse_as_command(self._prev) 753 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 754 755 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 756 this = super()._parse_table_part(schema=schema) or self._parse_number() 757 758 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 759 if isinstance(this, exp.Identifier): 760 table_name = this.name 761 while self._match(TokenType.DASH, advance=False) and self._next: 762 start = self._curr 763 while self._is_connected() and not self._match_set( 764 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 765 ): 766 self._advance() 767 768 if start == self._curr: 769 break 770 771 table_name += self._find_sql(start, self._prev) 772 773 this = exp.Identifier( 774 this=table_name, quoted=this.args.get("quoted") 775 ).update_positions(this) 776 elif isinstance(this, exp.Literal): 777 table_name = this.name 778 779 if self._is_connected() and self._parse_var(any_token=True): 780 table_name += self._prev.text 781 782 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 783 784 return this 785 786 def _parse_table_parts( 787 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 788 ) -> exp.Table: 789 table = super()._parse_table_parts( 790 schema=schema, is_db_reference=is_db_reference, wildcard=True 791 ) 792 793 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 794 if not table.catalog: 795 if table.db: 796 previous_db = table.args["db"] 797 parts = table.db.split(".") 798 if len(parts) == 2 and not table.args["db"].quoted: 799 table.set( 800 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 801 ) 802 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 803 else: 804 previous_this = table.this 805 parts = table.name.split(".") 806 if len(parts) == 2 and not table.this.quoted: 807 table.set( 808 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 809 ) 810 table.set( 811 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 812 ) 813 814 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 815 alias = table.this 816 catalog, db, this, *rest = ( 817 exp.to_identifier(p, quoted=True) 818 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 819 ) 820 821 for part in (catalog, db, this): 822 if part: 823 part.update_positions(table.this) 824 825 if rest and this: 826 this = exp.Dot.build([this, *rest]) # type: ignore 827 828 table = exp.Table( 829 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 830 ) 831 table.meta["quoted_table"] = True 832 else: 833 alias = None 834 835 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 836 # dataset, so if the project identifier is omitted we need to fix the ast so that 837 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 838 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 839 # views, because it would seem like the "catalog" part is set, when it'd actually 840 # be the region/dataset. Merging the two identifiers into a single one is done to 841 # avoid producing a 4-part Table reference, which would cause issues in the schema 842 # module, when there are 3-part table names mixed with information schema views. 843 # 844 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 845 table_parts = table.parts 846 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 847 # We need to alias the table here to avoid breaking existing qualified columns. 848 # This is expected to be safe, because if there's an actual alias coming up in 849 # the token stream, it will overwrite this one. If there isn't one, we are only 850 # exposing the name that can be used to reference the view explicitly (a no-op). 851 exp.alias_( 852 table, 853 t.cast(exp.Identifier, alias or table_parts[-1]), 854 table=True, 855 copy=False, 856 ) 857 858 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 859 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 860 line=table_parts[-2].meta.get("line"), 861 col=table_parts[-1].meta.get("col"), 862 start=table_parts[-2].meta.get("start"), 863 end=table_parts[-1].meta.get("end"), 864 ) 865 table.set("this", new_this) 866 table.set("db", seq_get(table_parts, -3)) 867 table.set("catalog", seq_get(table_parts, -4)) 868 869 return table 870 871 def _parse_column(self) -> t.Optional[exp.Expression]: 872 column = super()._parse_column() 873 if isinstance(column, exp.Column): 874 parts = column.parts 875 if any("." in p.name for p in parts): 876 catalog, db, table, this, *rest = ( 877 exp.to_identifier(p, quoted=True) 878 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 879 ) 880 881 if rest and this: 882 this = exp.Dot.build([this, *rest]) # type: ignore 883 884 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 885 column.meta["quoted_column"] = True 886 887 return column 888 889 @t.overload 890 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 891 892 @t.overload 893 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 894 895 def _parse_json_object(self, agg=False): 896 json_object = super()._parse_json_object() 897 array_kv_pair = seq_get(json_object.expressions, 0) 898 899 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 900 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 901 if ( 902 array_kv_pair 903 and isinstance(array_kv_pair.this, exp.Array) 904 and isinstance(array_kv_pair.expression, exp.Array) 905 ): 906 keys = array_kv_pair.this.expressions 907 values = array_kv_pair.expression.expressions 908 909 json_object.set( 910 "expressions", 911 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 912 ) 913 914 return json_object 915 916 def _parse_bracket( 917 self, this: t.Optional[exp.Expression] = None 918 ) -> t.Optional[exp.Expression]: 919 bracket = super()._parse_bracket(this) 920 921 if isinstance(bracket, exp.Array): 922 bracket.set("struct_name_inheritance", True) 923 924 if this is bracket: 925 return bracket 926 927 if isinstance(bracket, exp.Bracket): 928 for expression in bracket.expressions: 929 name = expression.name.upper() 930 931 if name not in self.BRACKET_OFFSETS: 932 break 933 934 offset, safe = self.BRACKET_OFFSETS[name] 935 bracket.set("offset", offset) 936 bracket.set("safe", safe) 937 expression.replace(expression.expressions[0]) 938 939 return bracket 940 941 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 942 unnest = super()._parse_unnest(with_alias=with_alias) 943 944 if not unnest: 945 return None 946 947 unnest_expr = seq_get(unnest.expressions, 0) 948 if unnest_expr: 949 from sqlglot.optimizer.annotate_types import annotate_types 950 951 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 952 953 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 954 # in contrast to other dialects such as DuckDB which flattens only the array by default 955 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 956 array_elem.is_type(exp.DataType.Type.STRUCT) 957 for array_elem in unnest_expr._type.expressions 958 ): 959 unnest.set("explode_array", True) 960 961 return unnest 962 963 def _parse_make_interval(self) -> exp.MakeInterval: 964 expr = exp.MakeInterval() 965 966 for arg_key in MAKE_INTERVAL_KWARGS: 967 value = self._parse_lambda() 968 969 if not value: 970 break 971 972 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 973 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 974 if isinstance(value, exp.Kwarg): 975 arg_key = value.this.name 976 977 expr.set(arg_key, value) 978 979 self._match(TokenType.COMMA) 980 981 return expr 982 983 def _parse_ml(self, expr_type: t.Type[E], **kwargs) -> E: 984 self._match_text_seq("MODEL") 985 this = self._parse_table() 986 987 self._match(TokenType.COMMA) 988 self._match_text_seq("TABLE") 989 990 # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one 991 expression = ( 992 self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None 993 ) 994 995 self._match(TokenType.COMMA) 996 997 return self.expression( 998 expr_type, 999 this=this, 1000 expression=expression, 1001 params_struct=self._parse_bitwise(), 1002 **kwargs, 1003 ) 1004 1005 def _parse_translate(self) -> exp.Translate | exp.MLTranslate: 1006 # Check if this is ML.TRANSLATE by looking at previous tokens 1007 token = seq_get(self._tokens, self._index - 4) 1008 if token and token.text.upper() == "ML": 1009 return self._parse_ml(exp.MLTranslate) 1010 1011 return exp.Translate.from_arg_list(self._parse_function_args()) 1012 1013 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1014 self._match(TokenType.TABLE) 1015 this = self._parse_table() 1016 1017 expr = self.expression(exp.FeaturesAtTime, this=this) 1018 1019 while self._match(TokenType.COMMA): 1020 arg = self._parse_lambda() 1021 1022 # Get the LHS of the Kwarg and set the arg to that value, e.g 1023 # "num_rows => 1" sets the expr's `num_rows` arg 1024 if arg: 1025 expr.set(arg.this.name, arg) 1026 1027 return expr 1028 1029 def _parse_vector_search(self) -> exp.VectorSearch: 1030 self._match(TokenType.TABLE) 1031 base_table = self._parse_table() 1032 1033 self._match(TokenType.COMMA) 1034 1035 column_to_search = self._parse_bitwise() 1036 self._match(TokenType.COMMA) 1037 1038 self._match(TokenType.TABLE) 1039 query_table = self._parse_table() 1040 1041 expr = self.expression( 1042 exp.VectorSearch, 1043 this=base_table, 1044 column_to_search=column_to_search, 1045 query_table=query_table, 1046 ) 1047 1048 while self._match(TokenType.COMMA): 1049 # query_column_to_search can be named argument or positional 1050 if self._match(TokenType.STRING, advance=False): 1051 query_column = self._parse_string() 1052 expr.set("query_column_to_search", query_column) 1053 else: 1054 arg = self._parse_lambda() 1055 if arg: 1056 expr.set(arg.this.name, arg) 1057 1058 return expr 1059 1060 def _parse_export_data(self) -> exp.Export: 1061 self._match_text_seq("DATA") 1062 1063 return self.expression( 1064 exp.Export, 1065 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1066 options=self._parse_properties(), 1067 this=self._match_text_seq("AS") and self._parse_select(), 1068 ) 1069 1070 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 1071 this = super()._parse_column_ops(this) 1072 1073 if isinstance(this, exp.Dot): 1074 prefix_name = this.this.name.upper() 1075 func_name = this.name.upper() 1076 if prefix_name == "NET": 1077 if func_name == "HOST": 1078 this = self.expression( 1079 exp.NetHost, this=seq_get(this.expression.expressions, 0) 1080 ) 1081 elif prefix_name == "SAFE": 1082 if func_name == "TIMESTAMP": 1083 this = _build_timestamp(this.expression.expressions) 1084 this.set("safe", True) 1085 1086 return this 1087 1088 class Generator(generator.Generator): 1089 INTERVAL_ALLOWS_PLURAL_FORM = False 1090 JOIN_HINTS = False 1091 QUERY_HINTS = False 1092 TABLE_HINTS = False 1093 LIMIT_FETCH = "LIMIT" 1094 RENAME_TABLE_WITH_DB = False 1095 NVL2_SUPPORTED = False 1096 UNNEST_WITH_ORDINALITY = False 1097 COLLATE_IS_FUNC = True 1098 LIMIT_ONLY_LITERALS = True 1099 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1100 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1101 JSON_KEY_VALUE_PAIR_SEP = "," 1102 NULL_ORDERING_SUPPORTED = False 1103 IGNORE_NULLS_IN_FUNC = True 1104 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1105 CAN_IMPLEMENT_ARRAY_ANY = True 1106 SUPPORTS_TO_NUMBER = False 1107 NAMED_PLACEHOLDER_TOKEN = "@" 1108 HEX_FUNC = "TO_HEX" 1109 WITH_PROPERTIES_PREFIX = "OPTIONS" 1110 SUPPORTS_EXPLODING_PROJECTIONS = False 1111 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1112 SUPPORTS_UNIX_SECONDS = True 1113 1114 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1115 1116 TS_OR_DS_TYPES = ( 1117 exp.TsOrDsToDatetime, 1118 exp.TsOrDsToTimestamp, 1119 exp.TsOrDsToTime, 1120 exp.TsOrDsToDate, 1121 ) 1122 1123 TRANSFORMS = { 1124 **generator.Generator.TRANSFORMS, 1125 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1126 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1127 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1128 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1129 exp.Array: inline_array_unless_query, 1130 exp.ArrayContains: _array_contains_sql, 1131 exp.ArrayFilter: filter_array_using_unnest, 1132 exp.ArrayRemove: filter_array_using_unnest, 1133 exp.BitwiseAndAgg: rename_func("BIT_AND"), 1134 exp.BitwiseOrAgg: rename_func("BIT_OR"), 1135 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 1136 exp.BitwiseCount: rename_func("BIT_COUNT"), 1137 exp.ByteLength: rename_func("BYTE_LENGTH"), 1138 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1139 exp.CollateProperty: lambda self, e: ( 1140 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1141 if e.args.get("default") 1142 else f"COLLATE {self.sql(e, 'this')}" 1143 ), 1144 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1145 exp.CountIf: rename_func("COUNTIF"), 1146 exp.Create: _create_sql, 1147 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1148 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1149 exp.DateDiff: lambda self, e: self.func( 1150 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1151 ), 1152 exp.DateFromParts: rename_func("DATE"), 1153 exp.DateStrToDate: datestrtodate_sql, 1154 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1155 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1156 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1157 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1158 exp.FromTimeZone: lambda self, e: self.func( 1159 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1160 ), 1161 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1162 exp.GroupConcat: lambda self, e: groupconcat_sql( 1163 self, e, func_name="STRING_AGG", within_group=False, sep=None 1164 ), 1165 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1166 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1167 exp.If: if_sql(false_value="NULL"), 1168 exp.ILike: no_ilike_sql, 1169 exp.IntDiv: rename_func("DIV"), 1170 exp.Int64: rename_func("INT64"), 1171 exp.JSONBool: rename_func("BOOL"), 1172 exp.JSONExtract: _json_extract_sql, 1173 exp.JSONExtractArray: _json_extract_sql, 1174 exp.JSONExtractScalar: _json_extract_sql, 1175 exp.JSONFormat: lambda self, e: self.func( 1176 "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING", 1177 e.this, 1178 e.args.get("options"), 1179 ), 1180 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1181 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1182 exp.Levenshtein: _levenshtein_sql, 1183 exp.Max: max_or_greatest, 1184 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1185 exp.MD5Digest: rename_func("MD5"), 1186 exp.Min: min_or_least, 1187 exp.Normalize: lambda self, e: self.func( 1188 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1189 e.this, 1190 e.args.get("form"), 1191 ), 1192 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1193 exp.RegexpExtract: lambda self, e: self.func( 1194 "REGEXP_EXTRACT", 1195 e.this, 1196 e.expression, 1197 e.args.get("position"), 1198 e.args.get("occurrence"), 1199 ), 1200 exp.RegexpExtractAll: lambda self, e: self.func( 1201 "REGEXP_EXTRACT_ALL", e.this, e.expression 1202 ), 1203 exp.RegexpReplace: regexp_replace_sql, 1204 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1205 exp.ReturnsProperty: _returnsproperty_sql, 1206 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1207 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1208 exp.ParseDatetime: lambda self, e: self.func( 1209 "PARSE_DATETIME", self.format_time(e), e.this 1210 ), 1211 exp.Select: transforms.preprocess( 1212 [ 1213 transforms.explode_projection_to_unnest(), 1214 transforms.unqualify_unnest, 1215 transforms.eliminate_distinct_on, 1216 _alias_ordered_group, 1217 transforms.eliminate_semi_and_anti_joins, 1218 ] 1219 ), 1220 exp.SHA: rename_func("SHA1"), 1221 exp.SHA2: sha256_sql, 1222 exp.SHA1Digest: rename_func("SHA1"), 1223 exp.SHA2Digest: sha2_digest_sql, 1224 exp.StabilityProperty: lambda self, e: ( 1225 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1226 ), 1227 exp.String: rename_func("STRING"), 1228 exp.StrPosition: lambda self, e: ( 1229 strposition_sql( 1230 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1231 ) 1232 ), 1233 exp.StrToDate: _str_to_datetime_sql, 1234 exp.StrToTime: _str_to_datetime_sql, 1235 exp.SessionUser: lambda *_: "SESSION_USER()", 1236 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1237 exp.TimeFromParts: rename_func("TIME"), 1238 exp.TimestampFromParts: rename_func("DATETIME"), 1239 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1240 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1241 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1242 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1243 exp.TimeStrToTime: timestrtotime_sql, 1244 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1245 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1246 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1247 exp.TsOrDsToTime: rename_func("TIME"), 1248 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1249 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1250 exp.Unhex: rename_func("FROM_HEX"), 1251 exp.UnixDate: rename_func("UNIX_DATE"), 1252 exp.UnixToTime: _unix_to_time_sql, 1253 exp.Uuid: lambda *_: "GENERATE_UUID()", 1254 exp.Values: _derived_table_values_to_unnest, 1255 exp.VariancePop: rename_func("VAR_POP"), 1256 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1257 } 1258 1259 SUPPORTED_JSON_PATH_PARTS = { 1260 exp.JSONPathKey, 1261 exp.JSONPathRoot, 1262 exp.JSONPathSubscript, 1263 } 1264 1265 TYPE_MAPPING = { 1266 **generator.Generator.TYPE_MAPPING, 1267 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1268 exp.DataType.Type.BIGINT: "INT64", 1269 exp.DataType.Type.BINARY: "BYTES", 1270 exp.DataType.Type.BLOB: "BYTES", 1271 exp.DataType.Type.BOOLEAN: "BOOL", 1272 exp.DataType.Type.CHAR: "STRING", 1273 exp.DataType.Type.DECIMAL: "NUMERIC", 1274 exp.DataType.Type.DOUBLE: "FLOAT64", 1275 exp.DataType.Type.FLOAT: "FLOAT64", 1276 exp.DataType.Type.INT: "INT64", 1277 exp.DataType.Type.NCHAR: "STRING", 1278 exp.DataType.Type.NVARCHAR: "STRING", 1279 exp.DataType.Type.SMALLINT: "INT64", 1280 exp.DataType.Type.TEXT: "STRING", 1281 exp.DataType.Type.TIMESTAMP: "DATETIME", 1282 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1283 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1284 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1285 exp.DataType.Type.TINYINT: "INT64", 1286 exp.DataType.Type.ROWVERSION: "BYTES", 1287 exp.DataType.Type.UUID: "STRING", 1288 exp.DataType.Type.VARBINARY: "BYTES", 1289 exp.DataType.Type.VARCHAR: "STRING", 1290 exp.DataType.Type.VARIANT: "ANY TYPE", 1291 } 1292 1293 PROPERTIES_LOCATION = { 1294 **generator.Generator.PROPERTIES_LOCATION, 1295 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1296 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1297 } 1298 1299 # WINDOW comes after QUALIFY 1300 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1301 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1302 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1303 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1304 } 1305 1306 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1307 RESERVED_KEYWORDS = { 1308 "all", 1309 "and", 1310 "any", 1311 "array", 1312 "as", 1313 "asc", 1314 "assert_rows_modified", 1315 "at", 1316 "between", 1317 "by", 1318 "case", 1319 "cast", 1320 "collate", 1321 "contains", 1322 "create", 1323 "cross", 1324 "cube", 1325 "current", 1326 "default", 1327 "define", 1328 "desc", 1329 "distinct", 1330 "else", 1331 "end", 1332 "enum", 1333 "escape", 1334 "except", 1335 "exclude", 1336 "exists", 1337 "extract", 1338 "false", 1339 "fetch", 1340 "following", 1341 "for", 1342 "from", 1343 "full", 1344 "group", 1345 "grouping", 1346 "groups", 1347 "hash", 1348 "having", 1349 "if", 1350 "ignore", 1351 "in", 1352 "inner", 1353 "intersect", 1354 "interval", 1355 "into", 1356 "is", 1357 "join", 1358 "lateral", 1359 "left", 1360 "like", 1361 "limit", 1362 "lookup", 1363 "merge", 1364 "natural", 1365 "new", 1366 "no", 1367 "not", 1368 "null", 1369 "nulls", 1370 "of", 1371 "on", 1372 "or", 1373 "order", 1374 "outer", 1375 "over", 1376 "partition", 1377 "preceding", 1378 "proto", 1379 "qualify", 1380 "range", 1381 "recursive", 1382 "respect", 1383 "right", 1384 "rollup", 1385 "rows", 1386 "select", 1387 "set", 1388 "some", 1389 "struct", 1390 "tablesample", 1391 "then", 1392 "to", 1393 "treat", 1394 "true", 1395 "unbounded", 1396 "union", 1397 "unnest", 1398 "using", 1399 "when", 1400 "where", 1401 "window", 1402 "with", 1403 "within", 1404 } 1405 1406 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1407 unit = expression.unit 1408 unit_sql = unit.name if unit.is_string else self.sql(unit) 1409 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1410 1411 def mod_sql(self, expression: exp.Mod) -> str: 1412 this = expression.this 1413 expr = expression.expression 1414 return self.func( 1415 "MOD", 1416 this.unnest() if isinstance(this, exp.Paren) else this, 1417 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1418 ) 1419 1420 def column_parts(self, expression: exp.Column) -> str: 1421 if expression.meta.get("quoted_column"): 1422 # If a column reference is of the form `dataset.table`.name, we need 1423 # to preserve the quoted table path, otherwise the reference breaks 1424 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1425 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1426 return f"{table_path}.{self.sql(expression, 'this')}" 1427 1428 return super().column_parts(expression) 1429 1430 def table_parts(self, expression: exp.Table) -> str: 1431 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1432 # we need to make sure the correct quoting is used in each case. 1433 # 1434 # For example, if there is a CTE x that clashes with a schema name, then the former will 1435 # return the table y in that schema, whereas the latter will return the CTE's y column: 1436 # 1437 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1438 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1439 if expression.meta.get("quoted_table"): 1440 table_parts = ".".join(p.name for p in expression.parts) 1441 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1442 1443 return super().table_parts(expression) 1444 1445 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1446 this = expression.this 1447 if isinstance(this, exp.TsOrDsToDatetime): 1448 func_name = "FORMAT_DATETIME" 1449 elif isinstance(this, exp.TsOrDsToTimestamp): 1450 func_name = "FORMAT_TIMESTAMP" 1451 elif isinstance(this, exp.TsOrDsToTime): 1452 func_name = "FORMAT_TIME" 1453 else: 1454 func_name = "FORMAT_DATE" 1455 1456 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1457 return self.func( 1458 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1459 ) 1460 1461 def eq_sql(self, expression: exp.EQ) -> str: 1462 # Operands of = cannot be NULL in BigQuery 1463 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1464 if not isinstance(expression.parent, exp.Update): 1465 return "NULL" 1466 1467 return self.binary(expression, "=") 1468 1469 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1470 parent = expression.parent 1471 1472 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1473 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1474 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1475 return self.func( 1476 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1477 ) 1478 1479 return super().attimezone_sql(expression) 1480 1481 def trycast_sql(self, expression: exp.TryCast) -> str: 1482 return self.cast_sql(expression, safe_prefix="SAFE_") 1483 1484 def bracket_sql(self, expression: exp.Bracket) -> str: 1485 this = expression.this 1486 expressions = expression.expressions 1487 1488 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1489 arg = expressions[0] 1490 if arg.type is None: 1491 from sqlglot.optimizer.annotate_types import annotate_types 1492 1493 arg = annotate_types(arg, dialect=self.dialect) 1494 1495 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1496 # BQ doesn't support bracket syntax with string values for structs 1497 return f"{self.sql(this)}.{arg.name}" 1498 1499 expressions_sql = self.expressions(expression, flat=True) 1500 offset = expression.args.get("offset") 1501 1502 if offset == 0: 1503 expressions_sql = f"OFFSET({expressions_sql})" 1504 elif offset == 1: 1505 expressions_sql = f"ORDINAL({expressions_sql})" 1506 elif offset is not None: 1507 self.unsupported(f"Unsupported array offset: {offset}") 1508 1509 if expression.args.get("safe"): 1510 expressions_sql = f"SAFE_{expressions_sql}" 1511 1512 return f"{self.sql(this)}[{expressions_sql}]" 1513 1514 def in_unnest_op(self, expression: exp.Unnest) -> str: 1515 return self.sql(expression) 1516 1517 def version_sql(self, expression: exp.Version) -> str: 1518 if expression.name == "TIMESTAMP": 1519 expression.set("this", "SYSTEM_TIME") 1520 return super().version_sql(expression) 1521 1522 def contains_sql(self, expression: exp.Contains) -> str: 1523 this = expression.this 1524 expr = expression.expression 1525 1526 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1527 this = this.this 1528 expr = expr.this 1529 1530 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1531 1532 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1533 this = expression.this 1534 1535 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1536 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1537 # because they aren't literals and so the above syntax is invalid BigQuery. 1538 if isinstance(this, exp.Array): 1539 elem = seq_get(this.expressions, 0) 1540 if not (elem and elem.find(exp.Query)): 1541 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1542 1543 return super().cast_sql(expression, safe_prefix=safe_prefix) 1544 1545 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1546 variables = self.expressions(expression, "this") 1547 default = self.sql(expression, "default") 1548 default = f" DEFAULT {default}" if default else "" 1549 kind = self.sql(expression, "kind") 1550 kind = f" {kind}" if kind else "" 1551 1552 return f"{variables}{kind}{default}" 1553 1554 def timestamp_sql(self, expression: exp.Timestamp) -> str: 1555 prefix = "SAFE." if expression.args.get("safe") else "" 1556 return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Whether byte string literals (ex: BigQuery's b'...') are typed as BYTES/BINARY
Whether to annotate all scopes during optimization. Used by BigQuery for UNNEST support.
Whether projection alias names can shadow table/source names in GROUP BY and HAVING clauses.
In BigQuery, when a projection alias has the same name as a source table, the alias takes precedence in GROUP BY and HAVING clauses, and the table becomes inaccessible by that name.
For example, in BigQuery: SELECT id, ARRAY_AGG(col) AS custom_fields FROM custom_fields GROUP BY id HAVING id >= 1
The "custom_fields" source is shadowed by the projection alias, so we cannot qualify "id" with "custom_fields" in GROUP BY/HAVING.
Whether table names can be referenced as columns (treated as structs).
BigQuery allows tables to be referenced as columns in queries, automatically treating them as struct values containing all the table's columns.
For example, in BigQuery: SELECT t FROM my_table AS t -- Returns entire row as a struct
Whether the dialect supports expanding struct fields using star notation (e.g., struct_col.*).
BigQuery allows struct fields to be expanded with the star operator:
SELECT t.struct_col.* FROM table t
RisingWave also allows struct field expansion with the star operator using parentheses:
SELECT (t.struct_col).* FROM table t
This expands to all fields within the struct.
Whether pseudocolumns should be excluded from star expansion (SELECT *).
Pseudocolumns are special dialect-specific columns (e.g., Oracle's ROWNUM, ROWID, LEVEL, or BigQuery's _PARTITIONTIME, _PARTITIONDATE) that are implicitly available but not part of the table schema. When this is True, SELECT * will not include these pseudocolumns; they must be explicitly selected.
Whether query results are typed as structs in metadata for type inference.
In BigQuery, subqueries store their column types as a STRUCT in metadata,
enabling special type inference for ARRAY(SELECT ...) expressions:
ARRAY(SELECT x, y FROM t) → ARRAY For single column subqueries, BigQuery unwraps the struct:
ARRAY(SELECT x FROM t) → ARRAY This is metadata-only for type inference.
Whether JSON_EXTRACT_SCALAR returns null if a non-scalar value is selected.
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
The default type of NULL for producing the correct projection type.
For example, in BigQuery the default type of the NULL value is INT64.
Whether to prioritize non-literal types over literals during type annotation.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
473 def normalize_identifier(self, expression: E) -> E: 474 if ( 475 isinstance(expression, exp.Identifier) 476 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 477 ): 478 parent = expression.parent 479 while isinstance(parent, exp.Dot): 480 parent = parent.parent 481 482 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 483 # by default. The following check uses a heuristic to detect tables based on whether 484 # they are qualified. This should generally be correct, because tables in BigQuery 485 # must be qualified with at least a dataset, unless @@dataset_id is set. 486 case_sensitive = ( 487 isinstance(parent, exp.UserDefinedFunction) 488 or ( 489 isinstance(parent, exp.Table) 490 and parent.db 491 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 492 ) 493 or expression.meta.get("is_table") 494 ) 495 if not case_sensitive: 496 expression.set("this", expression.this.lower()) 497 498 return t.cast(E, expression) 499 500 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
502 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 503 VAR_TOKENS = { 504 TokenType.DASH, 505 TokenType.VAR, 506 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
508 class Tokenizer(tokens.Tokenizer): 509 QUOTES = ["'", '"', '"""', "'''"] 510 COMMENTS = ["--", "#", ("/*", "*/")] 511 IDENTIFIERS = ["`"] 512 STRING_ESCAPES = ["\\"] 513 514 HEX_STRINGS = [("0x", ""), ("0X", "")] 515 516 BYTE_STRINGS = [ 517 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 518 ] 519 520 RAW_STRINGS = [ 521 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 522 ] 523 524 NESTED_COMMENTS = False 525 526 KEYWORDS = { 527 **tokens.Tokenizer.KEYWORDS, 528 "ANY TYPE": TokenType.VARIANT, 529 "BEGIN": TokenType.COMMAND, 530 "BEGIN TRANSACTION": TokenType.BEGIN, 531 "BYTEINT": TokenType.INT, 532 "BYTES": TokenType.BINARY, 533 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 534 "DATETIME": TokenType.TIMESTAMP, 535 "DECLARE": TokenType.DECLARE, 536 "ELSEIF": TokenType.COMMAND, 537 "EXCEPTION": TokenType.COMMAND, 538 "EXPORT": TokenType.EXPORT, 539 "FLOAT64": TokenType.DOUBLE, 540 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 541 "LOOP": TokenType.COMMAND, 542 "MODEL": TokenType.MODEL, 543 "NOT DETERMINISTIC": TokenType.VOLATILE, 544 "RECORD": TokenType.STRUCT, 545 "REPEAT": TokenType.COMMAND, 546 "TIMESTAMP": TokenType.TIMESTAMPTZ, 547 "WHILE": TokenType.COMMAND, 548 } 549 KEYWORDS.pop("DIV") 550 KEYWORDS.pop("VALUES") 551 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
553 class Parser(parser.Parser): 554 PREFIXED_PIVOT_COLUMNS = True 555 LOG_DEFAULTS_TO_LN = True 556 SUPPORTS_IMPLICIT_UNNEST = True 557 JOINS_HAVE_EQUAL_PRECEDENCE = True 558 559 # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier 560 ID_VAR_TOKENS = { 561 *parser.Parser.ID_VAR_TOKENS, 562 TokenType.GRANT, 563 } - {TokenType.ASC, TokenType.DESC} 564 565 ALIAS_TOKENS = { 566 *parser.Parser.ALIAS_TOKENS, 567 TokenType.GRANT, 568 } - {TokenType.ASC, TokenType.DESC} 569 570 TABLE_ALIAS_TOKENS = { 571 *parser.Parser.TABLE_ALIAS_TOKENS, 572 TokenType.GRANT, 573 } - {TokenType.ASC, TokenType.DESC} 574 575 COMMENT_TABLE_ALIAS_TOKENS = { 576 *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, 577 TokenType.GRANT, 578 } - {TokenType.ASC, TokenType.DESC} 579 580 UPDATE_ALIAS_TOKENS = { 581 *parser.Parser.UPDATE_ALIAS_TOKENS, 582 TokenType.GRANT, 583 } - {TokenType.ASC, TokenType.DESC} 584 585 FUNCTIONS = { 586 **parser.Parser.FUNCTIONS, 587 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 588 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 589 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 590 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 591 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 592 "BOOL": exp.JSONBool.from_arg_list, 593 "CONTAINS_SUBSTR": _build_contains_substring, 594 "DATE": _build_date, 595 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 596 "DATE_DIFF": build_date_diff, 597 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 598 "DATE_TRUNC": lambda args: exp.DateTrunc( 599 unit=seq_get(args, 1), 600 this=seq_get(args, 0), 601 zone=seq_get(args, 2), 602 ), 603 "DATETIME": _build_datetime, 604 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 605 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 606 "DIV": binary_from_function(exp.IntDiv), 607 "EDIT_DISTANCE": _build_levenshtein, 608 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 609 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 610 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 611 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 612 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 613 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 614 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 615 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 616 "JSON_STRIP_NULLS": _build_json_strip_nulls, 617 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 618 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 619 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 620 "MD5": exp.MD5Digest.from_arg_list, 621 "SHA1": exp.SHA1Digest.from_arg_list, 622 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 623 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 624 ), 625 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 626 "TO_HEX": _build_to_hex, 627 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 628 [seq_get(args, 1), seq_get(args, 0)] 629 ), 630 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 631 [seq_get(args, 1), seq_get(args, 0)] 632 ), 633 "PARSE_TIMESTAMP": _build_parse_timestamp, 634 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 635 [seq_get(args, 1), seq_get(args, 0)] 636 ), 637 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 638 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 639 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 640 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 641 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 642 ), 643 "SHA256": lambda args: exp.SHA2Digest( 644 this=seq_get(args, 0), length=exp.Literal.number(256) 645 ), 646 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 647 "SPLIT": lambda args: exp.Split( 648 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 649 this=seq_get(args, 0), 650 expression=seq_get(args, 1) or exp.Literal.string(","), 651 ), 652 "STRPOS": exp.StrPosition.from_arg_list, 653 "TIME": _build_time, 654 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 655 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 656 "TIMESTAMP": _build_timestamp, 657 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 658 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 659 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 660 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 661 ), 662 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 663 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 664 ), 665 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 666 "TO_JSON": lambda args: exp.JSONFormat( 667 this=seq_get(args, 0), options=seq_get(args, 1), to_json=True 668 ), 669 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 670 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 671 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 672 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 673 "FROM_HEX": exp.Unhex.from_arg_list, 674 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 675 } 676 # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function 677 FUNCTIONS.pop("SEARCH") 678 679 FUNCTION_PARSERS = { 680 **parser.Parser.FUNCTION_PARSERS, 681 "ARRAY": lambda self: self.expression( 682 exp.Array, 683 expressions=[self._parse_statement()], 684 struct_name_inheritance=True, 685 ), 686 "JSON_ARRAY": lambda self: self.expression( 687 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 688 ), 689 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 690 "PREDICT": lambda self: self._parse_ml(exp.Predict), 691 "TRANSLATE": lambda self: self._parse_translate(), 692 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 693 "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), 694 "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml( 695 exp.GenerateEmbedding, is_text=True 696 ), 697 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 698 "FORECAST": lambda self: self._parse_ml(exp.MLForecast), 699 } 700 FUNCTION_PARSERS.pop("TRIM") 701 702 NO_PAREN_FUNCTIONS = { 703 **parser.Parser.NO_PAREN_FUNCTIONS, 704 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 705 } 706 707 NESTED_TYPE_TOKENS = { 708 *parser.Parser.NESTED_TYPE_TOKENS, 709 TokenType.TABLE, 710 } 711 712 PROPERTY_PARSERS = { 713 **parser.Parser.PROPERTY_PARSERS, 714 "NOT DETERMINISTIC": lambda self: self.expression( 715 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 716 ), 717 "OPTIONS": lambda self: self._parse_with_property(), 718 } 719 720 CONSTRAINT_PARSERS = { 721 **parser.Parser.CONSTRAINT_PARSERS, 722 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 723 } 724 725 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 726 RANGE_PARSERS.pop(TokenType.OVERLAPS) 727 728 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 729 730 STATEMENT_PARSERS = { 731 **parser.Parser.STATEMENT_PARSERS, 732 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 733 TokenType.END: lambda self: self._parse_as_command(self._prev), 734 TokenType.FOR: lambda self: self._parse_for_in(), 735 TokenType.EXPORT: lambda self: self._parse_export_data(), 736 TokenType.DECLARE: lambda self: self._parse_declare(), 737 } 738 739 BRACKET_OFFSETS = { 740 "OFFSET": (0, False), 741 "ORDINAL": (1, False), 742 "SAFE_OFFSET": (0, True), 743 "SAFE_ORDINAL": (1, True), 744 } 745 746 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 747 index = self._index 748 this = self._parse_range() 749 self._match_text_seq("DO") 750 if self._match(TokenType.COMMAND): 751 self._retreat(index) 752 return self._parse_as_command(self._prev) 753 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 754 755 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 756 this = super()._parse_table_part(schema=schema) or self._parse_number() 757 758 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 759 if isinstance(this, exp.Identifier): 760 table_name = this.name 761 while self._match(TokenType.DASH, advance=False) and self._next: 762 start = self._curr 763 while self._is_connected() and not self._match_set( 764 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 765 ): 766 self._advance() 767 768 if start == self._curr: 769 break 770 771 table_name += self._find_sql(start, self._prev) 772 773 this = exp.Identifier( 774 this=table_name, quoted=this.args.get("quoted") 775 ).update_positions(this) 776 elif isinstance(this, exp.Literal): 777 table_name = this.name 778 779 if self._is_connected() and self._parse_var(any_token=True): 780 table_name += self._prev.text 781 782 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 783 784 return this 785 786 def _parse_table_parts( 787 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 788 ) -> exp.Table: 789 table = super()._parse_table_parts( 790 schema=schema, is_db_reference=is_db_reference, wildcard=True 791 ) 792 793 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 794 if not table.catalog: 795 if table.db: 796 previous_db = table.args["db"] 797 parts = table.db.split(".") 798 if len(parts) == 2 and not table.args["db"].quoted: 799 table.set( 800 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 801 ) 802 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 803 else: 804 previous_this = table.this 805 parts = table.name.split(".") 806 if len(parts) == 2 and not table.this.quoted: 807 table.set( 808 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 809 ) 810 table.set( 811 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 812 ) 813 814 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 815 alias = table.this 816 catalog, db, this, *rest = ( 817 exp.to_identifier(p, quoted=True) 818 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 819 ) 820 821 for part in (catalog, db, this): 822 if part: 823 part.update_positions(table.this) 824 825 if rest and this: 826 this = exp.Dot.build([this, *rest]) # type: ignore 827 828 table = exp.Table( 829 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 830 ) 831 table.meta["quoted_table"] = True 832 else: 833 alias = None 834 835 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 836 # dataset, so if the project identifier is omitted we need to fix the ast so that 837 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 838 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 839 # views, because it would seem like the "catalog" part is set, when it'd actually 840 # be the region/dataset. Merging the two identifiers into a single one is done to 841 # avoid producing a 4-part Table reference, which would cause issues in the schema 842 # module, when there are 3-part table names mixed with information schema views. 843 # 844 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 845 table_parts = table.parts 846 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 847 # We need to alias the table here to avoid breaking existing qualified columns. 848 # This is expected to be safe, because if there's an actual alias coming up in 849 # the token stream, it will overwrite this one. If there isn't one, we are only 850 # exposing the name that can be used to reference the view explicitly (a no-op). 851 exp.alias_( 852 table, 853 t.cast(exp.Identifier, alias or table_parts[-1]), 854 table=True, 855 copy=False, 856 ) 857 858 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 859 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 860 line=table_parts[-2].meta.get("line"), 861 col=table_parts[-1].meta.get("col"), 862 start=table_parts[-2].meta.get("start"), 863 end=table_parts[-1].meta.get("end"), 864 ) 865 table.set("this", new_this) 866 table.set("db", seq_get(table_parts, -3)) 867 table.set("catalog", seq_get(table_parts, -4)) 868 869 return table 870 871 def _parse_column(self) -> t.Optional[exp.Expression]: 872 column = super()._parse_column() 873 if isinstance(column, exp.Column): 874 parts = column.parts 875 if any("." in p.name for p in parts): 876 catalog, db, table, this, *rest = ( 877 exp.to_identifier(p, quoted=True) 878 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 879 ) 880 881 if rest and this: 882 this = exp.Dot.build([this, *rest]) # type: ignore 883 884 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 885 column.meta["quoted_column"] = True 886 887 return column 888 889 @t.overload 890 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 891 892 @t.overload 893 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 894 895 def _parse_json_object(self, agg=False): 896 json_object = super()._parse_json_object() 897 array_kv_pair = seq_get(json_object.expressions, 0) 898 899 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 900 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 901 if ( 902 array_kv_pair 903 and isinstance(array_kv_pair.this, exp.Array) 904 and isinstance(array_kv_pair.expression, exp.Array) 905 ): 906 keys = array_kv_pair.this.expressions 907 values = array_kv_pair.expression.expressions 908 909 json_object.set( 910 "expressions", 911 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 912 ) 913 914 return json_object 915 916 def _parse_bracket( 917 self, this: t.Optional[exp.Expression] = None 918 ) -> t.Optional[exp.Expression]: 919 bracket = super()._parse_bracket(this) 920 921 if isinstance(bracket, exp.Array): 922 bracket.set("struct_name_inheritance", True) 923 924 if this is bracket: 925 return bracket 926 927 if isinstance(bracket, exp.Bracket): 928 for expression in bracket.expressions: 929 name = expression.name.upper() 930 931 if name not in self.BRACKET_OFFSETS: 932 break 933 934 offset, safe = self.BRACKET_OFFSETS[name] 935 bracket.set("offset", offset) 936 bracket.set("safe", safe) 937 expression.replace(expression.expressions[0]) 938 939 return bracket 940 941 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 942 unnest = super()._parse_unnest(with_alias=with_alias) 943 944 if not unnest: 945 return None 946 947 unnest_expr = seq_get(unnest.expressions, 0) 948 if unnest_expr: 949 from sqlglot.optimizer.annotate_types import annotate_types 950 951 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 952 953 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 954 # in contrast to other dialects such as DuckDB which flattens only the array by default 955 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 956 array_elem.is_type(exp.DataType.Type.STRUCT) 957 for array_elem in unnest_expr._type.expressions 958 ): 959 unnest.set("explode_array", True) 960 961 return unnest 962 963 def _parse_make_interval(self) -> exp.MakeInterval: 964 expr = exp.MakeInterval() 965 966 for arg_key in MAKE_INTERVAL_KWARGS: 967 value = self._parse_lambda() 968 969 if not value: 970 break 971 972 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 973 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 974 if isinstance(value, exp.Kwarg): 975 arg_key = value.this.name 976 977 expr.set(arg_key, value) 978 979 self._match(TokenType.COMMA) 980 981 return expr 982 983 def _parse_ml(self, expr_type: t.Type[E], **kwargs) -> E: 984 self._match_text_seq("MODEL") 985 this = self._parse_table() 986 987 self._match(TokenType.COMMA) 988 self._match_text_seq("TABLE") 989 990 # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one 991 expression = ( 992 self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None 993 ) 994 995 self._match(TokenType.COMMA) 996 997 return self.expression( 998 expr_type, 999 this=this, 1000 expression=expression, 1001 params_struct=self._parse_bitwise(), 1002 **kwargs, 1003 ) 1004 1005 def _parse_translate(self) -> exp.Translate | exp.MLTranslate: 1006 # Check if this is ML.TRANSLATE by looking at previous tokens 1007 token = seq_get(self._tokens, self._index - 4) 1008 if token and token.text.upper() == "ML": 1009 return self._parse_ml(exp.MLTranslate) 1010 1011 return exp.Translate.from_arg_list(self._parse_function_args()) 1012 1013 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1014 self._match(TokenType.TABLE) 1015 this = self._parse_table() 1016 1017 expr = self.expression(exp.FeaturesAtTime, this=this) 1018 1019 while self._match(TokenType.COMMA): 1020 arg = self._parse_lambda() 1021 1022 # Get the LHS of the Kwarg and set the arg to that value, e.g 1023 # "num_rows => 1" sets the expr's `num_rows` arg 1024 if arg: 1025 expr.set(arg.this.name, arg) 1026 1027 return expr 1028 1029 def _parse_vector_search(self) -> exp.VectorSearch: 1030 self._match(TokenType.TABLE) 1031 base_table = self._parse_table() 1032 1033 self._match(TokenType.COMMA) 1034 1035 column_to_search = self._parse_bitwise() 1036 self._match(TokenType.COMMA) 1037 1038 self._match(TokenType.TABLE) 1039 query_table = self._parse_table() 1040 1041 expr = self.expression( 1042 exp.VectorSearch, 1043 this=base_table, 1044 column_to_search=column_to_search, 1045 query_table=query_table, 1046 ) 1047 1048 while self._match(TokenType.COMMA): 1049 # query_column_to_search can be named argument or positional 1050 if self._match(TokenType.STRING, advance=False): 1051 query_column = self._parse_string() 1052 expr.set("query_column_to_search", query_column) 1053 else: 1054 arg = self._parse_lambda() 1055 if arg: 1056 expr.set(arg.this.name, arg) 1057 1058 return expr 1059 1060 def _parse_export_data(self) -> exp.Export: 1061 self._match_text_seq("DATA") 1062 1063 return self.expression( 1064 exp.Export, 1065 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1066 options=self._parse_properties(), 1067 this=self._match_text_seq("AS") and self._parse_select(), 1068 ) 1069 1070 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 1071 this = super()._parse_column_ops(this) 1072 1073 if isinstance(this, exp.Dot): 1074 prefix_name = this.this.name.upper() 1075 func_name = this.name.upper() 1076 if prefix_name == "NET": 1077 if func_name == "HOST": 1078 this = self.expression( 1079 exp.NetHost, this=seq_get(this.expression.expressions, 0) 1080 ) 1081 elif prefix_name == "SAFE": 1082 if func_name == "TIMESTAMP": 1083 this = _build_timestamp(this.expression.expressions) 1084 this.set("safe", True) 1085 1086 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1088 class Generator(generator.Generator): 1089 INTERVAL_ALLOWS_PLURAL_FORM = False 1090 JOIN_HINTS = False 1091 QUERY_HINTS = False 1092 TABLE_HINTS = False 1093 LIMIT_FETCH = "LIMIT" 1094 RENAME_TABLE_WITH_DB = False 1095 NVL2_SUPPORTED = False 1096 UNNEST_WITH_ORDINALITY = False 1097 COLLATE_IS_FUNC = True 1098 LIMIT_ONLY_LITERALS = True 1099 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1100 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1101 JSON_KEY_VALUE_PAIR_SEP = "," 1102 NULL_ORDERING_SUPPORTED = False 1103 IGNORE_NULLS_IN_FUNC = True 1104 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1105 CAN_IMPLEMENT_ARRAY_ANY = True 1106 SUPPORTS_TO_NUMBER = False 1107 NAMED_PLACEHOLDER_TOKEN = "@" 1108 HEX_FUNC = "TO_HEX" 1109 WITH_PROPERTIES_PREFIX = "OPTIONS" 1110 SUPPORTS_EXPLODING_PROJECTIONS = False 1111 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1112 SUPPORTS_UNIX_SECONDS = True 1113 1114 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1115 1116 TS_OR_DS_TYPES = ( 1117 exp.TsOrDsToDatetime, 1118 exp.TsOrDsToTimestamp, 1119 exp.TsOrDsToTime, 1120 exp.TsOrDsToDate, 1121 ) 1122 1123 TRANSFORMS = { 1124 **generator.Generator.TRANSFORMS, 1125 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1126 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1127 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1128 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1129 exp.Array: inline_array_unless_query, 1130 exp.ArrayContains: _array_contains_sql, 1131 exp.ArrayFilter: filter_array_using_unnest, 1132 exp.ArrayRemove: filter_array_using_unnest, 1133 exp.BitwiseAndAgg: rename_func("BIT_AND"), 1134 exp.BitwiseOrAgg: rename_func("BIT_OR"), 1135 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 1136 exp.BitwiseCount: rename_func("BIT_COUNT"), 1137 exp.ByteLength: rename_func("BYTE_LENGTH"), 1138 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1139 exp.CollateProperty: lambda self, e: ( 1140 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1141 if e.args.get("default") 1142 else f"COLLATE {self.sql(e, 'this')}" 1143 ), 1144 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1145 exp.CountIf: rename_func("COUNTIF"), 1146 exp.Create: _create_sql, 1147 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1148 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1149 exp.DateDiff: lambda self, e: self.func( 1150 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1151 ), 1152 exp.DateFromParts: rename_func("DATE"), 1153 exp.DateStrToDate: datestrtodate_sql, 1154 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1155 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1156 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1157 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1158 exp.FromTimeZone: lambda self, e: self.func( 1159 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1160 ), 1161 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1162 exp.GroupConcat: lambda self, e: groupconcat_sql( 1163 self, e, func_name="STRING_AGG", within_group=False, sep=None 1164 ), 1165 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1166 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1167 exp.If: if_sql(false_value="NULL"), 1168 exp.ILike: no_ilike_sql, 1169 exp.IntDiv: rename_func("DIV"), 1170 exp.Int64: rename_func("INT64"), 1171 exp.JSONBool: rename_func("BOOL"), 1172 exp.JSONExtract: _json_extract_sql, 1173 exp.JSONExtractArray: _json_extract_sql, 1174 exp.JSONExtractScalar: _json_extract_sql, 1175 exp.JSONFormat: lambda self, e: self.func( 1176 "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING", 1177 e.this, 1178 e.args.get("options"), 1179 ), 1180 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1181 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1182 exp.Levenshtein: _levenshtein_sql, 1183 exp.Max: max_or_greatest, 1184 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1185 exp.MD5Digest: rename_func("MD5"), 1186 exp.Min: min_or_least, 1187 exp.Normalize: lambda self, e: self.func( 1188 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1189 e.this, 1190 e.args.get("form"), 1191 ), 1192 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1193 exp.RegexpExtract: lambda self, e: self.func( 1194 "REGEXP_EXTRACT", 1195 e.this, 1196 e.expression, 1197 e.args.get("position"), 1198 e.args.get("occurrence"), 1199 ), 1200 exp.RegexpExtractAll: lambda self, e: self.func( 1201 "REGEXP_EXTRACT_ALL", e.this, e.expression 1202 ), 1203 exp.RegexpReplace: regexp_replace_sql, 1204 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1205 exp.ReturnsProperty: _returnsproperty_sql, 1206 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1207 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1208 exp.ParseDatetime: lambda self, e: self.func( 1209 "PARSE_DATETIME", self.format_time(e), e.this 1210 ), 1211 exp.Select: transforms.preprocess( 1212 [ 1213 transforms.explode_projection_to_unnest(), 1214 transforms.unqualify_unnest, 1215 transforms.eliminate_distinct_on, 1216 _alias_ordered_group, 1217 transforms.eliminate_semi_and_anti_joins, 1218 ] 1219 ), 1220 exp.SHA: rename_func("SHA1"), 1221 exp.SHA2: sha256_sql, 1222 exp.SHA1Digest: rename_func("SHA1"), 1223 exp.SHA2Digest: sha2_digest_sql, 1224 exp.StabilityProperty: lambda self, e: ( 1225 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1226 ), 1227 exp.String: rename_func("STRING"), 1228 exp.StrPosition: lambda self, e: ( 1229 strposition_sql( 1230 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1231 ) 1232 ), 1233 exp.StrToDate: _str_to_datetime_sql, 1234 exp.StrToTime: _str_to_datetime_sql, 1235 exp.SessionUser: lambda *_: "SESSION_USER()", 1236 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1237 exp.TimeFromParts: rename_func("TIME"), 1238 exp.TimestampFromParts: rename_func("DATETIME"), 1239 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1240 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1241 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1242 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1243 exp.TimeStrToTime: timestrtotime_sql, 1244 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1245 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1246 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1247 exp.TsOrDsToTime: rename_func("TIME"), 1248 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1249 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1250 exp.Unhex: rename_func("FROM_HEX"), 1251 exp.UnixDate: rename_func("UNIX_DATE"), 1252 exp.UnixToTime: _unix_to_time_sql, 1253 exp.Uuid: lambda *_: "GENERATE_UUID()", 1254 exp.Values: _derived_table_values_to_unnest, 1255 exp.VariancePop: rename_func("VAR_POP"), 1256 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1257 } 1258 1259 SUPPORTED_JSON_PATH_PARTS = { 1260 exp.JSONPathKey, 1261 exp.JSONPathRoot, 1262 exp.JSONPathSubscript, 1263 } 1264 1265 TYPE_MAPPING = { 1266 **generator.Generator.TYPE_MAPPING, 1267 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1268 exp.DataType.Type.BIGINT: "INT64", 1269 exp.DataType.Type.BINARY: "BYTES", 1270 exp.DataType.Type.BLOB: "BYTES", 1271 exp.DataType.Type.BOOLEAN: "BOOL", 1272 exp.DataType.Type.CHAR: "STRING", 1273 exp.DataType.Type.DECIMAL: "NUMERIC", 1274 exp.DataType.Type.DOUBLE: "FLOAT64", 1275 exp.DataType.Type.FLOAT: "FLOAT64", 1276 exp.DataType.Type.INT: "INT64", 1277 exp.DataType.Type.NCHAR: "STRING", 1278 exp.DataType.Type.NVARCHAR: "STRING", 1279 exp.DataType.Type.SMALLINT: "INT64", 1280 exp.DataType.Type.TEXT: "STRING", 1281 exp.DataType.Type.TIMESTAMP: "DATETIME", 1282 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1283 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1284 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1285 exp.DataType.Type.TINYINT: "INT64", 1286 exp.DataType.Type.ROWVERSION: "BYTES", 1287 exp.DataType.Type.UUID: "STRING", 1288 exp.DataType.Type.VARBINARY: "BYTES", 1289 exp.DataType.Type.VARCHAR: "STRING", 1290 exp.DataType.Type.VARIANT: "ANY TYPE", 1291 } 1292 1293 PROPERTIES_LOCATION = { 1294 **generator.Generator.PROPERTIES_LOCATION, 1295 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1296 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1297 } 1298 1299 # WINDOW comes after QUALIFY 1300 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1301 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1302 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1303 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1304 } 1305 1306 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1307 RESERVED_KEYWORDS = { 1308 "all", 1309 "and", 1310 "any", 1311 "array", 1312 "as", 1313 "asc", 1314 "assert_rows_modified", 1315 "at", 1316 "between", 1317 "by", 1318 "case", 1319 "cast", 1320 "collate", 1321 "contains", 1322 "create", 1323 "cross", 1324 "cube", 1325 "current", 1326 "default", 1327 "define", 1328 "desc", 1329 "distinct", 1330 "else", 1331 "end", 1332 "enum", 1333 "escape", 1334 "except", 1335 "exclude", 1336 "exists", 1337 "extract", 1338 "false", 1339 "fetch", 1340 "following", 1341 "for", 1342 "from", 1343 "full", 1344 "group", 1345 "grouping", 1346 "groups", 1347 "hash", 1348 "having", 1349 "if", 1350 "ignore", 1351 "in", 1352 "inner", 1353 "intersect", 1354 "interval", 1355 "into", 1356 "is", 1357 "join", 1358 "lateral", 1359 "left", 1360 "like", 1361 "limit", 1362 "lookup", 1363 "merge", 1364 "natural", 1365 "new", 1366 "no", 1367 "not", 1368 "null", 1369 "nulls", 1370 "of", 1371 "on", 1372 "or", 1373 "order", 1374 "outer", 1375 "over", 1376 "partition", 1377 "preceding", 1378 "proto", 1379 "qualify", 1380 "range", 1381 "recursive", 1382 "respect", 1383 "right", 1384 "rollup", 1385 "rows", 1386 "select", 1387 "set", 1388 "some", 1389 "struct", 1390 "tablesample", 1391 "then", 1392 "to", 1393 "treat", 1394 "true", 1395 "unbounded", 1396 "union", 1397 "unnest", 1398 "using", 1399 "when", 1400 "where", 1401 "window", 1402 "with", 1403 "within", 1404 } 1405 1406 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1407 unit = expression.unit 1408 unit_sql = unit.name if unit.is_string else self.sql(unit) 1409 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1410 1411 def mod_sql(self, expression: exp.Mod) -> str: 1412 this = expression.this 1413 expr = expression.expression 1414 return self.func( 1415 "MOD", 1416 this.unnest() if isinstance(this, exp.Paren) else this, 1417 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1418 ) 1419 1420 def column_parts(self, expression: exp.Column) -> str: 1421 if expression.meta.get("quoted_column"): 1422 # If a column reference is of the form `dataset.table`.name, we need 1423 # to preserve the quoted table path, otherwise the reference breaks 1424 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1425 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1426 return f"{table_path}.{self.sql(expression, 'this')}" 1427 1428 return super().column_parts(expression) 1429 1430 def table_parts(self, expression: exp.Table) -> str: 1431 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1432 # we need to make sure the correct quoting is used in each case. 1433 # 1434 # For example, if there is a CTE x that clashes with a schema name, then the former will 1435 # return the table y in that schema, whereas the latter will return the CTE's y column: 1436 # 1437 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1438 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1439 if expression.meta.get("quoted_table"): 1440 table_parts = ".".join(p.name for p in expression.parts) 1441 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1442 1443 return super().table_parts(expression) 1444 1445 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1446 this = expression.this 1447 if isinstance(this, exp.TsOrDsToDatetime): 1448 func_name = "FORMAT_DATETIME" 1449 elif isinstance(this, exp.TsOrDsToTimestamp): 1450 func_name = "FORMAT_TIMESTAMP" 1451 elif isinstance(this, exp.TsOrDsToTime): 1452 func_name = "FORMAT_TIME" 1453 else: 1454 func_name = "FORMAT_DATE" 1455 1456 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1457 return self.func( 1458 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1459 ) 1460 1461 def eq_sql(self, expression: exp.EQ) -> str: 1462 # Operands of = cannot be NULL in BigQuery 1463 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1464 if not isinstance(expression.parent, exp.Update): 1465 return "NULL" 1466 1467 return self.binary(expression, "=") 1468 1469 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1470 parent = expression.parent 1471 1472 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1473 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1474 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1475 return self.func( 1476 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1477 ) 1478 1479 return super().attimezone_sql(expression) 1480 1481 def trycast_sql(self, expression: exp.TryCast) -> str: 1482 return self.cast_sql(expression, safe_prefix="SAFE_") 1483 1484 def bracket_sql(self, expression: exp.Bracket) -> str: 1485 this = expression.this 1486 expressions = expression.expressions 1487 1488 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1489 arg = expressions[0] 1490 if arg.type is None: 1491 from sqlglot.optimizer.annotate_types import annotate_types 1492 1493 arg = annotate_types(arg, dialect=self.dialect) 1494 1495 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1496 # BQ doesn't support bracket syntax with string values for structs 1497 return f"{self.sql(this)}.{arg.name}" 1498 1499 expressions_sql = self.expressions(expression, flat=True) 1500 offset = expression.args.get("offset") 1501 1502 if offset == 0: 1503 expressions_sql = f"OFFSET({expressions_sql})" 1504 elif offset == 1: 1505 expressions_sql = f"ORDINAL({expressions_sql})" 1506 elif offset is not None: 1507 self.unsupported(f"Unsupported array offset: {offset}") 1508 1509 if expression.args.get("safe"): 1510 expressions_sql = f"SAFE_{expressions_sql}" 1511 1512 return f"{self.sql(this)}[{expressions_sql}]" 1513 1514 def in_unnest_op(self, expression: exp.Unnest) -> str: 1515 return self.sql(expression) 1516 1517 def version_sql(self, expression: exp.Version) -> str: 1518 if expression.name == "TIMESTAMP": 1519 expression.set("this", "SYSTEM_TIME") 1520 return super().version_sql(expression) 1521 1522 def contains_sql(self, expression: exp.Contains) -> str: 1523 this = expression.this 1524 expr = expression.expression 1525 1526 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1527 this = this.this 1528 expr = expr.this 1529 1530 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1531 1532 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1533 this = expression.this 1534 1535 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1536 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1537 # because they aren't literals and so the above syntax is invalid BigQuery. 1538 if isinstance(this, exp.Array): 1539 elem = seq_get(this.expressions, 0) 1540 if not (elem and elem.find(exp.Query)): 1541 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1542 1543 return super().cast_sql(expression, safe_prefix=safe_prefix) 1544 1545 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1546 variables = self.expressions(expression, "this") 1547 default = self.sql(expression, "default") 1548 default = f" DEFAULT {default}" if default else "" 1549 kind = self.sql(expression, "kind") 1550 kind = f" {kind}" if kind else "" 1551 1552 return f"{variables}{kind}{default}" 1553 1554 def timestamp_sql(self, expression: exp.Timestamp) -> str: 1555 prefix = "SAFE." if expression.args.get("safe") else "" 1556 return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1420 def column_parts(self, expression: exp.Column) -> str: 1421 if expression.meta.get("quoted_column"): 1422 # If a column reference is of the form `dataset.table`.name, we need 1423 # to preserve the quoted table path, otherwise the reference breaks 1424 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1425 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1426 return f"{table_path}.{self.sql(expression, 'this')}" 1427 1428 return super().column_parts(expression)
1430 def table_parts(self, expression: exp.Table) -> str: 1431 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1432 # we need to make sure the correct quoting is used in each case. 1433 # 1434 # For example, if there is a CTE x that clashes with a schema name, then the former will 1435 # return the table y in that schema, whereas the latter will return the CTE's y column: 1436 # 1437 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1438 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1439 if expression.meta.get("quoted_table"): 1440 table_parts = ".".join(p.name for p in expression.parts) 1441 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1442 1443 return super().table_parts(expression)
1445 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1446 this = expression.this 1447 if isinstance(this, exp.TsOrDsToDatetime): 1448 func_name = "FORMAT_DATETIME" 1449 elif isinstance(this, exp.TsOrDsToTimestamp): 1450 func_name = "FORMAT_TIMESTAMP" 1451 elif isinstance(this, exp.TsOrDsToTime): 1452 func_name = "FORMAT_TIME" 1453 else: 1454 func_name = "FORMAT_DATE" 1455 1456 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1457 return self.func( 1458 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1459 )
1461 def eq_sql(self, expression: exp.EQ) -> str: 1462 # Operands of = cannot be NULL in BigQuery 1463 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1464 if not isinstance(expression.parent, exp.Update): 1465 return "NULL" 1466 1467 return self.binary(expression, "=")
1469 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1470 parent = expression.parent 1471 1472 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1473 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1474 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1475 return self.func( 1476 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1477 ) 1478 1479 return super().attimezone_sql(expression)
1484 def bracket_sql(self, expression: exp.Bracket) -> str: 1485 this = expression.this 1486 expressions = expression.expressions 1487 1488 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1489 arg = expressions[0] 1490 if arg.type is None: 1491 from sqlglot.optimizer.annotate_types import annotate_types 1492 1493 arg = annotate_types(arg, dialect=self.dialect) 1494 1495 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1496 # BQ doesn't support bracket syntax with string values for structs 1497 return f"{self.sql(this)}.{arg.name}" 1498 1499 expressions_sql = self.expressions(expression, flat=True) 1500 offset = expression.args.get("offset") 1501 1502 if offset == 0: 1503 expressions_sql = f"OFFSET({expressions_sql})" 1504 elif offset == 1: 1505 expressions_sql = f"ORDINAL({expressions_sql})" 1506 elif offset is not None: 1507 self.unsupported(f"Unsupported array offset: {offset}") 1508 1509 if expression.args.get("safe"): 1510 expressions_sql = f"SAFE_{expressions_sql}" 1511 1512 return f"{self.sql(this)}[{expressions_sql}]"
1522 def contains_sql(self, expression: exp.Contains) -> str: 1523 this = expression.this 1524 expr = expression.expression 1525 1526 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1527 this = this.this 1528 expr = expr.this 1529 1530 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope"))
1532 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1533 this = expression.this 1534 1535 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1536 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1537 # because they aren't literals and so the above syntax is invalid BigQuery. 1538 if isinstance(this, exp.Array): 1539 elem = seq_get(this.expressions, 0) 1540 if not (elem and elem.find(exp.Query)): 1541 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1542 1543 return super().cast_sql(expression, safe_prefix=safe_prefix)
1545 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1546 variables = self.expressions(expression, "this") 1547 default = self.sql(expression, "default") 1548 default = f" DEFAULT {default}" if default else "" 1549 kind = self.sql(expression, "kind") 1550 kind = f" {kind}" if kind else "" 1551 1552 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql